summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp1
-rw-r--r--compiler/common_compiler_test.cc4
-rw-r--r--compiler/dex/dex_to_dex_decompiler.cc2
-rw-r--r--compiler/driver/compiler_driver.cc92
-rw-r--r--compiler/driver/compiler_driver_test.cc5
-rw-r--r--compiler/intrinsics_list.h7
-rw-r--r--compiler/oat_test.cc23
-rw-r--r--compiler/oat_writer.cc41
-rw-r--r--compiler/optimizing/bytecode_utils.h180
-rw-r--r--compiler/optimizing/code_generator.cc5
-rw-r--r--compiler/optimizing/code_generator_arm64.cc4
-rw-r--r--compiler/optimizing/code_generator_mips.cc90
-rw-r--r--compiler/optimizing/code_generator_mips.h1
-rw-r--r--compiler/optimizing/code_generator_mips64.cc185
-rw-r--r--compiler/optimizing/code_sinking.cc403
-rw-r--r--compiler/optimizing/code_sinking.h48
-rw-r--r--compiler/optimizing/common_dominator.h6
-rw-r--r--compiler/optimizing/induction_var_range.cc107
-rw-r--r--compiler/optimizing/induction_var_range.h16
-rw-r--r--compiler/optimizing/induction_var_range_test.cc112
-rw-r--r--compiler/optimizing/inliner.cc28
-rw-r--r--compiler/optimizing/inliner.h7
-rw-r--r--compiler/optimizing/intrinsics.cc109
-rw-r--r--compiler/optimizing/intrinsics.h33
-rw-r--r--compiler/optimizing/intrinsics_arm.cc70
-rw-r--r--compiler/optimizing/intrinsics_arm.h1
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc73
-rw-r--r--compiler/optimizing/intrinsics_arm64.h4
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc72
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.h1
-rw-r--r--compiler/optimizing/intrinsics_mips.cc62
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc61
-rw-r--r--compiler/optimizing/intrinsics_x86.cc59
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc60
-rw-r--r--compiler/optimizing/load_store_elimination.cc109
-rw-r--r--compiler/optimizing/loop_optimization.cc3
-rw-r--r--compiler/optimizing/loop_optimization.h9
-rw-r--r--compiler/optimizing/loop_optimization_test.cc2
-rw-r--r--compiler/optimizing/nodes.h3
-rw-r--r--compiler/optimizing/optimizing_compiler.cc25
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h29
-rw-r--r--compiler/optimizing/side_effects_analysis.h6
-rw-r--r--compiler/utils/arm/assembler_arm_vixl.h10
-rw-r--r--compiler/utils/mips/assembler_mips.cc4
-rw-r--r--compiler/utils/mips/assembler_mips.h36
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc161
-rw-r--r--compiler/utils/mips64/assembler_mips64.h224
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc18
-rw-r--r--compiler/utils/x86/assembler_x86.cc8
-rw-r--r--compiler/utils/x86/assembler_x86.h1
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc8
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc9
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h1
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc4
54 files changed, 2078 insertions, 564 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index f5589cd7a3..1ee2a21b18 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -52,6 +52,7 @@ art_cc_defaults {
"optimizing/cha_guard_optimization.cc",
"optimizing/code_generator.cc",
"optimizing/code_generator_utils.cc",
+ "optimizing/code_sinking.cc",
"optimizing/constant_folding.cc",
"optimizing/dead_code_elimination.cc",
"optimizing/escape.cc",
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index d89cdbabf8..9a45379a05 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -52,10 +52,10 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) {
compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
method->GetDexMethodIndex()));
}
- if (compiled_method != nullptr) {
+ // If the code size is 0 it means the method was skipped due to profile guided compilation.
+ if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) {
ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
uint32_t code_size = code.size();
- CHECK_NE(0u, code_size);
ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
uint32_t vmap_table_offset = vmap_table.empty() ? 0u
: sizeof(OatQuickMethodHeader) + vmap_table.size();
diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/compiler/dex/dex_to_dex_decompiler.cc
index bfd485d126..53601033da 100644
--- a/compiler/dex/dex_to_dex_decompiler.cc
+++ b/compiler/dex/dex_to_dex_decompiler.cc
@@ -20,7 +20,7 @@
#include "base/mutex.h"
#include "dex_file-inl.h"
#include "dex_instruction-inl.h"
-#include "optimizing/bytecode_utils.h"
+#include "bytecode_utils.h"
namespace art {
namespace optimizer {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 7e91453741..a5e4cb0877 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2283,7 +2283,7 @@ class InitializeClassVisitor : public CompilationVisitor {
public:
explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
- virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
+ void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
ATRACE_CALL();
jobject jclass_loader = manager_->GetClassLoader();
const DexFile& dex_file = *manager_->GetDexFile();
@@ -2343,23 +2343,32 @@ class InitializeClassVisitor : public CompilationVisitor {
// mode which prevents the GC from visiting objects modified during the transaction.
// Ensure GC is not run so don't access freed objects when aborting transaction.
- ScopedAssertNoThreadSuspension ants("Transaction end");
- runtime->ExitTransactionMode();
+ {
+ ScopedAssertNoThreadSuspension ants("Transaction end");
+ runtime->ExitTransactionMode();
+
+ if (!success) {
+ CHECK(soa.Self()->IsExceptionPending());
+ mirror::Throwable* exception = soa.Self()->GetException();
+ VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+ << exception->Dump();
+ std::ostream* file_log = manager_->GetCompiler()->
+ GetCompilerOptions().GetInitFailureOutput();
+ if (file_log != nullptr) {
+ *file_log << descriptor << "\n";
+ *file_log << exception->Dump() << "\n";
+ }
+ soa.Self()->ClearException();
+ transaction.Rollback();
+ CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+ }
+ }
if (!success) {
- CHECK(soa.Self()->IsExceptionPending());
- mirror::Throwable* exception = soa.Self()->GetException();
- VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
- << exception->Dump();
- std::ostream* file_log = manager_->GetCompiler()->
- GetCompilerOptions().GetInitFailureOutput();
- if (file_log != nullptr) {
- *file_log << descriptor << "\n";
- *file_log << exception->Dump() << "\n";
- }
- soa.Self()->ClearException();
- transaction.Rollback();
- CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+ // On failure, still intern strings of static fields and seen in <clinit>, as these
+ // will be created in the zygote. This is separated from the transaction code just
+ // above as we will allocate strings, so must be allowed to suspend.
+ InternStrings(klass, class_loader);
}
}
}
@@ -2375,6 +2384,57 @@ class InitializeClassVisitor : public CompilationVisitor {
}
private:
+ void InternStrings(Handle<mirror::Class> klass, Handle<mirror::ClassLoader> class_loader)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(manager_->GetCompiler()->GetCompilerOptions().IsBootImage());
+ DCHECK(klass->IsVerified());
+ DCHECK(!klass->IsInitialized());
+
+ StackHandleScope<1> hs(Thread::Current());
+ Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache());
+ const DexFile* dex_file = manager_->GetDexFile();
+ const DexFile::ClassDef* class_def = klass->GetClassDef();
+ ClassLinker* class_linker = manager_->GetClassLinker();
+
+ // Check encoded final field values for strings and intern.
+ annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file,
+ &h_dex_cache,
+ &class_loader,
+ manager_->GetClassLinker(),
+ *class_def);
+ for ( ; value_it.HasNext(); value_it.Next()) {
+ if (value_it.GetValueType() == annotations::RuntimeEncodedStaticFieldValueIterator::kString) {
+ // Resolve the string. This will intern the string.
+ art::ObjPtr<mirror::String> resolved = class_linker->ResolveString(
+ *dex_file, dex::StringIndex(value_it.GetJavaValue().i), h_dex_cache);
+ CHECK(resolved != nullptr);
+ }
+ }
+
+ // Intern strings seen in <clinit>.
+ ArtMethod* clinit = klass->FindClassInitializer(class_linker->GetImagePointerSize());
+ if (clinit != nullptr) {
+ const DexFile::CodeItem* code_item = clinit->GetCodeItem();
+ DCHECK(code_item != nullptr);
+ const Instruction* inst = Instruction::At(code_item->insns_);
+
+ const uint32_t insns_size = code_item->insns_size_in_code_units_;
+ for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
+ if (inst->Opcode() == Instruction::CONST_STRING) {
+ ObjPtr<mirror::String> s = class_linker->ResolveString(
+ *dex_file, dex::StringIndex(inst->VRegB_21c()), h_dex_cache);
+ CHECK(s != nullptr);
+ } else if (inst->Opcode() == Instruction::CONST_STRING_JUMBO) {
+ ObjPtr<mirror::String> s = class_linker->ResolveString(
+ *dex_file, dex::StringIndex(inst->VRegB_31c()), h_dex_cache);
+ CHECK(s != nullptr);
+ }
+ dex_pc += inst->SizeInCodeUnits();
+ inst = inst->Next();
+ }
+ }
+ }
+
const ParallelCompilationManager* const manager_;
};
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 97954f3c29..562f97b3ae 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -240,9 +240,8 @@ class CompilerDriverProfileTest : public CompilerDriverTest {
ProfileCompilationInfo info;
for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
- std::string key = ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation());
- profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 1);
- profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 2);
+ profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+ profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
}
return &profile_info_;
}
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 9bd25d8484..63c23cb074 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -24,6 +24,10 @@
// Note: adding a new intrinsic requires an art image version change,
// as the modifiers flag for some ArtMethods will need to be changed.
+// Note: j.l.Integer.valueOf says kNoThrow even though it could throw an OOME.
+// The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf
+// (kNoSideEffects), and it is also OK to remove it if it's unused.
+
#define INTRINSICS_LIST(V) \
V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \
V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \
@@ -149,7 +153,8 @@
V(UnsafeLoadFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "loadFence", "()V") \
V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \
V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \
- V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;")
+ V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \
+ V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;")
#endif // ART_COMPILER_INTRINSICS_LIST_H_
#undef ART_COMPILER_INTRINSICS_LIST_H_ // #define is only for lint.
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 66111f6e23..e2233e4bbd 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -265,6 +265,7 @@ class OatTest : public CommonCompilerTest {
void TestDexFileInput(bool verify, bool low_4gb, bool use_profile);
void TestZipFileInput(bool verify);
+ void TestZipFileInputWithEmptyDex();
std::unique_ptr<const InstructionSetFeatures> insn_features_;
std::unique_ptr<QuickCompilerCallbacks> callbacks_;
@@ -821,6 +822,28 @@ TEST_F(OatTest, ZipFileInputCheckVerifier) {
TestZipFileInput(true);
}
+void OatTest::TestZipFileInputWithEmptyDex() {
+ ScratchFile zip_file;
+ ZipBuilder zip_builder(zip_file.GetFile());
+ bool success = zip_builder.AddFile("classes.dex", nullptr, 0);
+ ASSERT_TRUE(success);
+ success = zip_builder.Finish();
+ ASSERT_TRUE(success) << strerror(errno);
+
+ SafeMap<std::string, std::string> key_value_store;
+ key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+ std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() }; // NOLINT [readability/braces] [4]
+ ScratchFile oat_file, vdex_file(oat_file, ".vdex");
+ std::unique_ptr<ProfileCompilationInfo> profile_compilation_info(new ProfileCompilationInfo());
+ success = WriteElf(vdex_file.GetFile(), oat_file.GetFile(), input_filenames,
+ key_value_store, /*verify*/false, profile_compilation_info.get());
+ ASSERT_FALSE(success);
+}
+
+TEST_F(OatTest, ZipFileInputWithEmptyDex) {
+ TestZipFileInputWithEmptyDex();
+}
+
TEST_F(OatTest, UpdateChecksum) {
InstructionSet insn_set = kX86;
std::string error_msg;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 0ea11255a8..8ab44d2c19 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -104,6 +104,13 @@ inline uint32_t CodeAlignmentSize(uint32_t header_offset, const CompiledMethod&
// Defines the location of the raw dex file to write.
class OatWriter::DexFileSource {
public:
+ enum Type {
+ kNone,
+ kZipEntry,
+ kRawFile,
+ kRawData,
+ };
+
explicit DexFileSource(ZipEntry* zip_entry)
: type_(kZipEntry), source_(zip_entry) {
DCHECK(source_ != nullptr);
@@ -119,6 +126,7 @@ class OatWriter::DexFileSource {
DCHECK(source_ != nullptr);
}
+ Type GetType() const { return type_; }
bool IsZipEntry() const { return type_ == kZipEntry; }
bool IsRawFile() const { return type_ == kRawFile; }
bool IsRawData() const { return type_ == kRawData; }
@@ -147,13 +155,6 @@ class OatWriter::DexFileSource {
}
private:
- enum Type {
- kNone,
- kZipEntry,
- kRawFile,
- kRawData,
- };
-
Type type_;
const void* source_;
};
@@ -2259,16 +2260,38 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil
ZipEntry* zip_entry = oat_dex_file->source_.GetZipEntry();
std::unique_ptr<MemMap> mem_map(
zip_entry->ExtractToMemMap(location.c_str(), "classes.dex", &error_msg));
+ if (mem_map == nullptr) {
+ LOG(ERROR) << "Failed to extract dex file to mem map for layout: " << error_msg;
+ return false;
+ }
dex_file = DexFile::Open(location,
zip_entry->GetCrc32(),
std::move(mem_map),
/* verify */ true,
/* verify_checksum */ true,
&error_msg);
- } else {
- DCHECK(oat_dex_file->source_.IsRawFile());
+ } else if (oat_dex_file->source_.IsRawFile()) {
File* raw_file = oat_dex_file->source_.GetRawFile();
dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg);
+ } else {
+ // The source data is a vdex file.
+ CHECK(oat_dex_file->source_.IsRawData())
+ << static_cast<size_t>(oat_dex_file->source_.GetType());
+ const uint8_t* raw_dex_file = oat_dex_file->source_.GetRawData();
+ // Note: The raw data has already been checked to contain the header
+ // and all the data that the header specifies as the file size.
+ DCHECK(raw_dex_file != nullptr);
+ DCHECK(ValidateDexFileHeader(raw_dex_file, oat_dex_file->GetLocation()));
+ const UnalignedDexFileHeader* header = AsUnalignedDexFileHeader(raw_dex_file);
+ // Since the source may have had its layout changed, don't verify the checksum.
+ dex_file = DexFile::Open(raw_dex_file,
+ header->file_size_,
+ location,
+ oat_dex_file->dex_file_location_checksum_,
+ nullptr,
+ /* verify */ true,
+ /* verify_checksum */ false,
+ &error_msg);
}
if (dex_file == nullptr) {
LOG(ERROR) << "Failed to open dex file for layout: " << error_msg;
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
deleted file mode 100644
index 133afa47fe..0000000000
--- a/compiler/optimizing/bytecode_utils.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
-#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
-
-#include "base/arena_object.h"
-#include "dex_file.h"
-#include "dex_file-inl.h"
-#include "dex_instruction-inl.h"
-
-namespace art {
-
-class CodeItemIterator : public ValueObject {
- public:
- explicit CodeItemIterator(const DexFile::CodeItem& code_item) : CodeItemIterator(code_item, 0u) {}
- CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc)
- : code_ptr_(code_item.insns_ + start_dex_pc),
- code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
- dex_pc_(start_dex_pc) {}
-
- bool Done() const { return code_ptr_ >= code_end_; }
- bool IsLast() const { return code_ptr_ + CurrentInstruction().SizeInCodeUnits() >= code_end_; }
-
- const Instruction& CurrentInstruction() const { return *Instruction::At(code_ptr_); }
- uint32_t CurrentDexPc() const { return dex_pc_; }
-
- void Advance() {
- DCHECK(!Done());
- size_t instruction_size = CurrentInstruction().SizeInCodeUnits();
- code_ptr_ += instruction_size;
- dex_pc_ += instruction_size;
- }
-
- private:
- const uint16_t* code_ptr_;
- const uint16_t* const code_end_;
- uint32_t dex_pc_;
-
- DISALLOW_COPY_AND_ASSIGN(CodeItemIterator);
-};
-
-class DexSwitchTable : public ValueObject {
- public:
- DexSwitchTable(const Instruction& instruction, uint32_t dex_pc)
- : instruction_(instruction),
- dex_pc_(dex_pc),
- sparse_(instruction.Opcode() == Instruction::SPARSE_SWITCH) {
- int32_t table_offset = instruction.VRegB_31t();
- const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
- DCHECK_EQ(table[0], sparse_ ? static_cast<uint16_t>(Instruction::kSparseSwitchSignature)
- : static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
- num_entries_ = table[1];
- values_ = reinterpret_cast<const int32_t*>(&table[2]);
- }
-
- uint16_t GetNumEntries() const {
- return num_entries_;
- }
-
- void CheckIndex(size_t index) const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
- }
- }
-
- int32_t GetEntryAt(size_t index) const {
- CheckIndex(index);
- return values_[index];
- }
-
- uint32_t GetDexPcForIndex(size_t index) const {
- CheckIndex(index);
- return dex_pc_ +
- (reinterpret_cast<const int16_t*>(values_ + index) -
- reinterpret_cast<const int16_t*>(&instruction_));
- }
-
- // Index of the first value in the table.
- size_t GetFirstValueIndex() const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- return num_entries_;
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- return 1;
- }
- }
-
- bool IsSparse() const { return sparse_; }
-
- bool ShouldBuildDecisionTree() {
- return IsSparse() || GetNumEntries() <= kSmallSwitchThreshold;
- }
-
- private:
- const Instruction& instruction_;
- const uint32_t dex_pc_;
-
- // Whether this is a sparse-switch table (or a packed-switch one).
- const bool sparse_;
-
- // This can't be const as it needs to be computed off of the given instruction, and complicated
- // expressions in the initializer list seemed very ugly.
- uint16_t num_entries_;
-
- const int32_t* values_;
-
- // The number of entries in a packed switch before we use a jump table or specified
- // compare/jump series.
- static constexpr uint16_t kSmallSwitchThreshold = 3;
-
- DISALLOW_COPY_AND_ASSIGN(DexSwitchTable);
-};
-
-class DexSwitchTableIterator {
- public:
- explicit DexSwitchTableIterator(const DexSwitchTable& table)
- : table_(table),
- num_entries_(static_cast<size_t>(table_.GetNumEntries())),
- first_target_offset_(table_.GetFirstValueIndex()),
- index_(0u) {}
-
- bool Done() const { return index_ >= num_entries_; }
- bool IsLast() const { return index_ == num_entries_ - 1; }
-
- void Advance() {
- DCHECK(!Done());
- index_++;
- }
-
- int32_t CurrentKey() const {
- return table_.IsSparse() ? table_.GetEntryAt(index_) : table_.GetEntryAt(0) + index_;
- }
-
- int32_t CurrentTargetOffset() const {
- return table_.GetEntryAt(index_ + first_target_offset_);
- }
-
- uint32_t GetDexPcForCurrentIndex() const { return table_.GetDexPcForIndex(index_); }
-
- private:
- const DexSwitchTable& table_;
- const size_t num_entries_;
- const size_t first_target_offset_;
-
- size_t index_;
-};
-
-inline const Instruction& GetDexInstructionAt(const DexFile::CodeItem& code_item, uint32_t dex_pc) {
- return CodeItemIterator(code_item, dex_pc).CurrentInstruction();
-}
-
-inline bool IsThrowingDexInstruction(const Instruction& instruction) {
- // Special-case MONITOR_EXIT which is a throwing instruction but the verifier
- // guarantees that it will never throw. This is necessary to avoid rejecting
- // 'synchronized' blocks/methods.
- return instruction.IsThrow() && instruction.Opcode() != Instruction::MONITOR_EXIT;
-}
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 8dd423fcbb..424b8507fb 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -861,8 +861,11 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
bool CodeGenerator::HasStackMapAtCurrentPc() {
uint32_t pc = GetAssembler()->CodeSize();
size_t count = stack_map_stream_.GetNumberOfStackMaps();
+ if (count == 0) {
+ return false;
+ }
CodeOffset native_pc_offset = stack_map_stream_.GetStackMap(count - 1).native_pc_code_offset;
- return (count > 0) && (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
+ return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
}
void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index edccbd4904..18c95b3c41 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4094,7 +4094,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
}
void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
- IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
@@ -4107,7 +4107,7 @@ void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* inv
// art::PrepareForRegisterAllocation.
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
- IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+ IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
if (intrinsic.TryDispatch(invoke)) {
return;
}
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c9dde7cc55..51dd898a81 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -1899,9 +1899,9 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
}
}
-auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) {
- auto null_checker = [this, instruction]() {
- this->codegen_->MaybeRecordImplicitNullCheck(instruction);
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* codegen) {
+ auto null_checker = [codegen, instruction]() {
+ codegen->MaybeRecordImplicitNullCheck(instruction);
};
return null_checker;
}
@@ -1911,7 +1911,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
Register obj = locations->InAt(0).AsRegister<Register>();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
- auto null_checker = GetImplicitNullChecker(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
Primitive::Type type = instruction->GetType();
const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
@@ -2073,6 +2073,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
+
+ if (type == Primitive::kPrimNot) {
+ Register out = locations->Out().AsRegister<Register>();
+ __ MaybeUnpoisonHeapReference(out);
+ }
}
void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
@@ -2143,7 +2148,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
bool needs_runtime_call = locations->WillCall();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
- auto null_checker = GetImplicitNullChecker(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
Register base_reg = index.IsConstant() ? obj : TMP;
switch (value_type) {
@@ -2200,7 +2205,31 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
DCHECK(!needs_write_barrier);
} else {
Register value = value_location.AsRegister<Register>();
- __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ DCHECK_EQ(value_type, Primitive::kPrimNot);
+ // Use Sw() instead of StoreToOffset() in order to be able to
+ // hold the poisoned reference in AT and thus avoid allocating
+ // yet another temporary register.
+ if (index.IsConstant()) {
+ if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
+ int16_t low = Low16Bits(data_offset);
+ uint32_t high = data_offset - low;
+ __ Addiu32(TMP, obj, high);
+ base_reg = TMP;
+ data_offset = low;
+ }
+ } else {
+ DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
+ }
+ __ PoisonHeapReference(AT, value);
+ __ Sw(AT, base_reg, data_offset);
+ null_checker();
+ } else {
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ }
if (needs_write_barrier) {
DCHECK_EQ(value_type, Primitive::kPrimNot);
codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
@@ -2208,6 +2237,8 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
}
} else {
DCHECK_EQ(value_type, Primitive::kPrimNot);
+ // Note: if heap poisoning is enabled, pAputObject takes care
+ // of poisoning the reference.
codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
}
@@ -2322,6 +2353,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
__ Beqz(obj, slow_path->GetExitLabel());
// Compare the class of `obj` with `cls`.
__ LoadFromOffset(kLoadWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
+ __ MaybeUnpoisonHeapReference(obj_cls);
__ Bne(obj_cls, cls, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -4891,7 +4923,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
LoadOperandType load_type = kLoadUnsignedByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- auto null_checker = GetImplicitNullChecker(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
switch (type) {
case Primitive::kPrimBoolean:
@@ -4958,6 +4990,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
dst = locations->Out().AsRegister<Register>();
}
__ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+ if (type == Primitive::kPrimNot) {
+ __ MaybeUnpoisonHeapReference(dst);
+ }
} else {
DCHECK(locations->Out().IsFpuRegister());
FRegister dst = locations->Out().AsFpuRegister<FRegister>();
@@ -5016,7 +5051,8 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
StoreOperandType store_type = kStoreByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- auto null_checker = GetImplicitNullChecker(instruction);
+ bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
switch (type) {
case Primitive::kPrimBoolean:
@@ -5089,7 +5125,16 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
} else {
src = value_location.AsRegister<Register>();
}
- __ StoreToOffset(store_type, src, obj, offset, null_checker);
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ __ PoisonHeapReference(TMP, src);
+ __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+ } else {
+ __ StoreToOffset(store_type, src, obj, offset, null_checker);
+ }
} else {
FRegister src = value_location.AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
@@ -5101,7 +5146,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
}
// TODO: memory barriers?
- if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+ if (needs_write_barrier) {
Register src = value_location.AsRegister<Register>();
codegen_->MarkGCCard(obj, src, value_can_be_null);
}
@@ -5173,6 +5218,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
// Compare the class of `obj` with `cls`.
__ LoadFromOffset(kLoadWord, out, obj, mirror::Object::ClassOffset().Int32Value());
+ __ MaybeUnpoisonHeapReference(out);
if (instruction->IsExactCheck()) {
// Classes must be equal for the instanceof to succeed.
__ Xor(out, out, cls);
@@ -5239,6 +5285,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke
__ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
__ LoadFromOffset(kLoadWord, temp, temp,
mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -5562,6 +5616,14 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem
// temp = object->GetClass();
__ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -5692,7 +5754,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
__ SetReorder(reordering);
generate_null_check = true;
break;
@@ -5837,7 +5899,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
__ SetReorder(reordering);
SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
codegen_->AddSlowPath(slow_path);
@@ -6059,6 +6121,8 @@ void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
}
@@ -6076,6 +6140,8 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
}
void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 47eba50248..0ccd80ab93 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -297,7 +297,6 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
- auto GetImplicitNullChecker(HInstruction* instruction);
void GenPackedSwitchWithCompares(Register value_reg,
int32_t lower_bound,
uint32_t num_entries,
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5be0da4011..138ebe6a25 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -1483,11 +1483,19 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
}
}
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS64* codegen) {
+ auto null_checker = [codegen, instruction]() {
+ codegen->MaybeRecordImplicitNullCheck(instruction);
+ };
+ return null_checker;
+}
+
void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
Primitive::Type type = instruction->GetType();
const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
@@ -1498,10 +1506,10 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker);
} else {
__ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
- __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1511,10 +1519,10 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker);
} else {
__ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
- __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1524,11 +1532,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1537,8 +1545,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
if (maybe_compressed_char_at) {
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
- __ LoadFromOffset(kLoadWord, TMP, obj, count_offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
__ Dext(TMP, TMP, 0, 1);
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
@@ -1563,7 +1570,8 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
__ LoadFromOffset(kLoadUnsignedHalfword,
out,
obj,
- data_offset + (const_index << TIMES_2));
+ data_offset + (const_index << TIMES_2),
+ null_checker);
}
} else {
GpuRegister index_reg = index.AsRegister<GpuRegister>();
@@ -1581,7 +1589,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
} else {
__ Dsll(TMP, index_reg, TIMES_2);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
}
}
break;
@@ -1595,11 +1603,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFromOffset(load_type, out, obj, offset);
+ __ LoadFromOffset(load_type, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(load_type, out, TMP, data_offset);
+ __ LoadFromOffset(load_type, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1609,11 +1617,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+ __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
__ Daddu(TMP, obj, TMP);
- __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+ __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1623,11 +1631,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ LoadFpuFromOffset(kLoadWord, out, obj, offset);
+ __ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
__ Daddu(TMP, obj, TMP);
- __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset);
+ __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1637,11 +1645,11 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset);
+ __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
__ Daddu(TMP, obj, TMP);
- __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset);
+ __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
}
break;
}
@@ -1650,8 +1658,10 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
- if (!maybe_compressed_char_at) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+
+ if (type == Primitive::kPrimNot) {
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ __ MaybeUnpoisonHeapReference(out);
}
}
@@ -1703,6 +1713,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
bool needs_runtime_call = locations->WillCall();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
switch (value_type) {
case Primitive::kPrimBoolean:
@@ -1712,10 +1723,10 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
- __ StoreToOffset(kStoreByte, value, obj, offset);
+ __ StoreToOffset(kStoreByte, value, obj, offset, null_checker);
} else {
__ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
- __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+ __ StoreToOffset(kStoreByte, value, TMP, data_offset, null_checker);
}
break;
}
@@ -1727,11 +1738,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
- __ StoreToOffset(kStoreHalfword, value, obj, offset);
+ __ StoreToOffset(kStoreHalfword, value, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
__ Daddu(TMP, obj, TMP);
- __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+ __ StoreToOffset(kStoreHalfword, value, TMP, data_offset, null_checker);
}
break;
}
@@ -1740,24 +1751,59 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimNot: {
if (!needs_runtime_call) {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ GpuRegister base_reg;
GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
if (index.IsConstant()) {
- size_t offset =
- (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreToOffset(kStoreWord, value, obj, offset);
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+ base_reg = obj;
} else {
DCHECK(index.IsRegister()) << index;
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
__ Daddu(TMP, obj, TMP);
- __ StoreToOffset(kStoreWord, value, TMP, data_offset);
+ base_reg = TMP;
+ }
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ DCHECK_EQ(value_type, Primitive::kPrimNot);
+ // Use Sw() instead of StoreToOffset() in order to be able to
+ // hold the poisoned reference in AT and thus avoid allocating
+ // yet another temporary register.
+ if (index.IsConstant()) {
+ if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
+ int16_t low16 = Low16Bits(data_offset);
+ // For consistency with StoreToOffset() and such treat data_offset as int32_t.
+ uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16;
+ int16_t upper16 = High16Bits(high48);
+ // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a
+ // compensatory 64KB added, which may push `high48` above 2GB and require
+ // the dahi instruction.
+ int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0);
+ __ Daui(TMP, obj, upper16);
+ if (higher16 != 0) {
+ __ Dahi(TMP, higher16);
+ }
+ base_reg = TMP;
+ data_offset = low16;
+ }
+ } else {
+ DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
+ }
+ __ PoisonHeapReference(AT, value);
+ __ Sw(AT, base_reg, data_offset);
+ null_checker();
+ } else {
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
if (needs_write_barrier) {
DCHECK_EQ(value_type, Primitive::kPrimNot);
codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
}
} else {
DCHECK_EQ(value_type, Primitive::kPrimNot);
+ // Note: if heap poisoning is enabled, pAputObject takes care
+ // of poisoning the reference.
codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
}
@@ -1770,11 +1816,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+ __ StoreToOffset(kStoreDoubleword, value, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
__ Daddu(TMP, obj, TMP);
- __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+ __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker);
}
break;
}
@@ -1786,11 +1832,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
- __ StoreFpuToOffset(kStoreWord, value, obj, offset);
+ __ StoreFpuToOffset(kStoreWord, value, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
__ Daddu(TMP, obj, TMP);
- __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset);
+ __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset, null_checker);
}
break;
}
@@ -1802,11 +1848,11 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
- __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset);
+ __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset, null_checker);
} else {
__ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
__ Daddu(TMP, obj, TMP);
- __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset);
+ __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset, null_checker);
}
break;
}
@@ -1815,11 +1861,6 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
-
- // Ints and objects are handled in the switch.
- if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- }
}
void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -1871,6 +1912,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
__ Beqzc(obj, slow_path->GetExitLabel());
// Compare the class of `obj` with `cls`.
__ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
+ __ MaybeUnpoisonHeapReference(obj_cls);
__ Bnec(obj_cls, cls, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -3086,6 +3128,9 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
LocationSummary* locations = instruction->GetLocations();
GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
LoadOperandType load_type = kLoadUnsignedByte;
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
switch (type) {
case Primitive::kPrimBoolean:
load_type = kLoadUnsignedByte;
@@ -3117,15 +3162,18 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
if (!Primitive::IsFloatingPointType(type)) {
DCHECK(locations->Out().IsRegister());
GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
- __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+ __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
} else {
DCHECK(locations->Out().IsFpuRegister());
FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
- __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+ __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker);
}
-
- codegen_->MaybeRecordImplicitNullCheck(instruction);
// TODO: memory barrier?
+
+ if (type == Primitive::kPrimNot) {
+ GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
+ __ MaybeUnpoisonHeapReference(dst);
+ }
}
void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
@@ -3147,6 +3195,10 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
LocationSummary* locations = instruction->GetLocations();
GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
StoreOperandType store_type = kStoreByte;
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+ auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -3172,16 +3224,24 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
if (!Primitive::IsFloatingPointType(type)) {
DCHECK(locations->InAt(1).IsRegister());
GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
- __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+ if (kPoisonHeapReferences && needs_write_barrier) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ DCHECK_EQ(type, Primitive::kPrimNot);
+ __ PoisonHeapReference(TMP, src);
+ __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+ } else {
+ __ StoreToOffset(store_type, src, obj, offset, null_checker);
+ }
} else {
DCHECK(locations->InAt(1).IsFpuRegister());
FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>();
- __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+ __ StoreFpuToOffset(store_type, src, obj, offset, null_checker);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
// TODO: memory barriers?
- if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+ if (needs_write_barrier) {
DCHECK(locations->InAt(1).IsRegister());
GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
codegen_->MarkGCCard(obj, src, value_can_be_null);
@@ -3247,6 +3307,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
// Compare the class of `obj` with `cls`.
__ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value());
+ __ MaybeUnpoisonHeapReference(out);
if (instruction->IsExactCheck()) {
// Classes must be equal for the instanceof to succeed.
__ Xor(out, out, cls);
@@ -3325,6 +3386,14 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo
__ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
__ LoadFromOffset(kLoadDoubleword, temp, temp,
mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -3567,6 +3636,14 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
// temp = object->GetClass();
__ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
+ __ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
__ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
@@ -3666,8 +3743,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
case HLoadClass::LoadKind::kBssEntry: {
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
- __ Lwu(out, AT, /* placeholder */ 0x5678);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
generate_null_check = true;
break;
}
@@ -3773,8 +3850,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
- codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
- __ Lwu(out, AT, /* placeholder */ 0x5678);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+ GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
codegen_->AddSlowPath(slow_path);
__ Beqzc(out, slow_path->GetEntryLabel());
@@ -3944,6 +4021,8 @@ void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
}
@@ -3961,6 +4040,8 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
+ // Note: if heap poisoning is enabled, the entry point takes care
+ // of poisoning the reference.
if (instruction->IsStringAlloc()) {
// String is allocated through StringFactory. Call NewEmptyString entry point.
GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
new file mode 100644
index 0000000000..dc3d378e75
--- /dev/null
+++ b/compiler/optimizing/code_sinking.cc
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_sinking.h"
+
+#include "common_dominator.h"
+#include "nodes.h"
+
+namespace art {
+
+void CodeSinking::Run() {
+ HBasicBlock* exit = graph_->GetExitBlock();
+ if (exit == nullptr) {
+ // Infinite loop, just bail.
+ return;
+ }
+ // TODO(ngeoffray): we do not profile branches yet, so use throw instructions
+ // as an indicator of an uncommon branch.
+ for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
+ if (exit_predecessor->GetLastInstruction()->IsThrow()) {
+ SinkCodeToUncommonBranch(exit_predecessor);
+ }
+ }
+}
+
+static bool IsInterestingInstruction(HInstruction* instruction) {
+ // Instructions from the entry graph (for example constants) are never interesting to move.
+ if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) {
+ return false;
+ }
+ // We want to move moveable instructions that cannot throw, as well as
+ // heap stores and allocations.
+
+ // Volatile stores cannot be moved.
+ if (instruction->IsInstanceFieldSet()) {
+ if (instruction->AsInstanceFieldSet()->IsVolatile()) {
+ return false;
+ }
+ }
+
+ // Check allocations first, as they can throw, but it is safe to move them.
+ if (instruction->IsNewInstance() || instruction->IsNewArray()) {
+ return true;
+ }
+
+ // All other instructions that can throw cannot be moved.
+ if (instruction->CanThrow()) {
+ return false;
+ }
+
+ // We can only store on local allocations. Other heap references can
+ // be escaping. Note that allocations can escape too, but we only move
+ // allocations if their users can move to, or are in the list of
+ // post dominated blocks.
+ if (instruction->IsInstanceFieldSet()) {
+ if (!instruction->InputAt(0)->IsNewInstance()) {
+ return false;
+ }
+ }
+
+ if (instruction->IsArraySet()) {
+ if (!instruction->InputAt(0)->IsNewArray()) {
+ return false;
+ }
+ }
+
+ // Heap accesses cannot go pass instructions that have memory side effects, which
+ // we are not tracking here. Note that the load/store elimination optimization
+ // runs before this optimization, and should have removed interesting ones.
+ // In theory, we could handle loads of local allocations, but this is currently
+ // hard to test, as LSE removes them.
+ if (instruction->IsStaticFieldGet() ||
+ instruction->IsInstanceFieldGet() ||
+ instruction->IsArrayGet()) {
+ return false;
+ }
+
+ if (instruction->IsInstanceFieldSet() ||
+ instruction->IsArraySet() ||
+ instruction->CanBeMoved()) {
+ return true;
+ }
+ return false;
+}
+
+static void AddInstruction(HInstruction* instruction,
+ const ArenaBitVector& processed_instructions,
+ const ArenaBitVector& discard_blocks,
+ ArenaVector<HInstruction*>* worklist) {
+ // Add to the work list if the instruction is not in the list of blocks
+ // to discard, hasn't been already processed and is of interest.
+ if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) &&
+ !processed_instructions.IsBitSet(instruction->GetId()) &&
+ IsInterestingInstruction(instruction)) {
+ worklist->push_back(instruction);
+ }
+}
+
+static void AddInputs(HInstruction* instruction,
+ const ArenaBitVector& processed_instructions,
+ const ArenaBitVector& discard_blocks,
+ ArenaVector<HInstruction*>* worklist) {
+ for (HInstruction* input : instruction->GetInputs()) {
+ AddInstruction(input, processed_instructions, discard_blocks, worklist);
+ }
+}
+
+static void AddInputs(HBasicBlock* block,
+ const ArenaBitVector& processed_instructions,
+ const ArenaBitVector& discard_blocks,
+ ArenaVector<HInstruction*>* worklist) {
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+ }
+}
+
+static bool ShouldFilterUse(HInstruction* instruction,
+ HInstruction* user,
+ const ArenaBitVector& post_dominated) {
+ if (instruction->IsNewInstance()) {
+ return user->IsInstanceFieldSet() &&
+ (user->InputAt(0) == instruction) &&
+ !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+ } else if (instruction->IsNewArray()) {
+ return user->IsArraySet() &&
+ (user->InputAt(0) == instruction) &&
+ !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+ }
+ return false;
+}
+
+
+// Find the ideal position for moving `instruction`. If `filter` is true,
+// we filter out store instructions to that instruction, which are processed
+// first in the step (3) of the sinking algorithm.
+// This method is tailored to the sinking algorithm, unlike
+// the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops.
+static HInstruction* FindIdealPosition(HInstruction* instruction,
+ const ArenaBitVector& post_dominated,
+ bool filter = false) {
+ DCHECK(!instruction->IsPhi()); // Makes no sense for Phi.
+
+ // Find the target block.
+ CommonDominator finder(/* start_block */ nullptr);
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
+ finder.Update(user->IsPhi()
+ ? user->GetBlock()->GetPredecessors()[use.GetIndex()]
+ : user->GetBlock());
+ }
+ }
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ DCHECK(!use.GetUser()->GetHolder()->IsPhi());
+ DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated));
+ finder.Update(use.GetUser()->GetHolder()->GetBlock());
+ }
+ HBasicBlock* target_block = finder.Get();
+ if (target_block == nullptr) {
+ // No user we can go next to? Likely a LSE or DCE limitation.
+ return nullptr;
+ }
+
+ // Move to the first dominator not in a loop, if we can.
+ while (target_block->IsInLoop()) {
+ if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
+ break;
+ }
+ target_block = target_block->GetDominator();
+ DCHECK(target_block != nullptr);
+ }
+
+ // Find insertion position. No need to filter anymore, as we have found a
+ // target block.
+ HInstruction* insert_pos = nullptr;
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ if (use.GetUser()->GetBlock() == target_block &&
+ (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
+ insert_pos = use.GetUser();
+ }
+ }
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ HInstruction* user = use.GetUser()->GetHolder();
+ if (user->GetBlock() == target_block &&
+ (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
+ insert_pos = user;
+ }
+ }
+ if (insert_pos == nullptr) {
+ // No user in `target_block`, insert before the control flow instruction.
+ insert_pos = target_block->GetLastInstruction();
+ DCHECK(insert_pos->IsControlFlow());
+ // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+ if (insert_pos->IsIf()) {
+ HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+ if (if_input == insert_pos->GetPrevious()) {
+ insert_pos = if_input;
+ }
+ }
+ }
+ DCHECK(!insert_pos->IsPhi());
+ return insert_pos;
+}
+
+
+void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
+ // Local allocator to discard data structures created below at the end of
+ // this optimization.
+ ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+
+ size_t number_of_instructions = graph_->GetCurrentInstructionId();
+ ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc));
+ ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false);
+ ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false);
+ ArenaBitVector instructions_that_can_move(
+ &allocator, number_of_instructions, /* expandable */ false);
+ ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
+
+ // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
+ // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
+ // computint the post dominator tree, but that could be too time consuming. Also,
+ // we should start the analysis from blocks dominated by an uncommon branch, but we
+ // don't profile branches yet.
+ bool found_block = false;
+ for (HBasicBlock* block : graph_->GetPostOrder()) {
+ if (block == end_block) {
+ found_block = true;
+ post_dominated.SetBit(block->GetBlockId());
+ } else if (found_block) {
+ bool is_post_dominated = true;
+ if (block->GetSuccessors().empty()) {
+ // We currently bail for loops.
+ is_post_dominated = false;
+ } else {
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!post_dominated.IsBitSet(successor->GetBlockId())) {
+ is_post_dominated = false;
+ break;
+ }
+ }
+ }
+ if (is_post_dominated) {
+ post_dominated.SetBit(block->GetBlockId());
+ }
+ }
+ }
+
+ // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs
+ // of instructions in these blocks that are not themselves in these blocks.
+ // Also find the common dominator of the found post dominated blocks, to help filtering
+ // out un-movable uses in step (2).
+ CommonDominator finder(end_block);
+ for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
+ if (post_dominated.IsBitSet(i)) {
+ finder.Update(graph_->GetBlocks()[i]);
+ AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist);
+ }
+ }
+ HBasicBlock* common_dominator = finder.Get();
+
+ // Step (2): iterate over the worklist to find sinking candidates.
+ while (!worklist.empty()) {
+ HInstruction* instruction = worklist.back();
+ if (processed_instructions.IsBitSet(instruction->GetId())) {
+ // The instruction has already been processed, continue. This happens
+ // when the instruction is the input/user of multiple instructions.
+ worklist.pop_back();
+ continue;
+ }
+ bool all_users_in_post_dominated_blocks = true;
+ bool can_move = true;
+ // Check users of the instruction.
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) &&
+ !instructions_that_can_move.IsBitSet(user->GetId())) {
+ all_users_in_post_dominated_blocks = false;
+ // If we've already processed this user, or the user cannot be moved, or
+ // is not dominating the post dominated blocks, bail.
+ // TODO(ngeoffray): The domination check is an approximation. We should
+ // instead check if the dominated blocks post dominate the user's block,
+ // but we do not have post dominance information here.
+ if (processed_instructions.IsBitSet(user->GetId()) ||
+ !IsInterestingInstruction(user) ||
+ !user->GetBlock()->Dominates(common_dominator)) {
+ can_move = false;
+ break;
+ }
+ }
+ }
+
+ // Check environment users of the instruction. Some of these users require
+ // the instruction not to move.
+ if (all_users_in_post_dominated_blocks) {
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ HEnvironment* environment = use.GetUser();
+ HInstruction* user = environment->GetHolder();
+ if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+ if (graph_->IsDebuggable() ||
+ user->IsDeoptimize() ||
+ user->CanThrowIntoCatchBlock() ||
+ (user->IsSuspendCheck() && graph_->IsCompilingOsr())) {
+ can_move = false;
+ break;
+ }
+ }
+ }
+ }
+ if (!can_move) {
+ // Instruction cannot be moved, mark it as processed and remove it from the work
+ // list.
+ processed_instructions.SetBit(instruction->GetId());
+ worklist.pop_back();
+ } else if (all_users_in_post_dominated_blocks) {
+ // Instruction is a candidate for being sunk. Mark it as such, remove it from the
+ // work list, and add its inputs to the work list.
+ instructions_that_can_move.SetBit(instruction->GetId());
+ move_in_order.push_back(instruction);
+ processed_instructions.SetBit(instruction->GetId());
+ worklist.pop_back();
+ AddInputs(instruction, processed_instructions, post_dominated, &worklist);
+ // Drop the environment use not in the list of post-dominated block. This is
+ // to help step (3) of this optimization, when we start moving instructions
+ // closer to their use.
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ HEnvironment* environment = use.GetUser();
+ HInstruction* user = environment->GetHolder();
+ if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+ environment->RemoveAsUserOfInput(use.GetIndex());
+ environment->SetRawEnvAt(use.GetIndex(), nullptr);
+ }
+ }
+ } else {
+ // The information we have on the users was not enough to decide whether the
+ // instruction could be moved.
+ // Add the users to the work list, and keep the instruction in the work list
+ // to process it again once all users have been processed.
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist);
+ }
+ }
+ }
+
+ // Make sure we process instructions in dominated order. This is required for heap
+ // stores.
+ std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
+ return b->StrictlyDominates(a);
+ });
+
+ // Step (3): Try to move sinking candidates.
+ for (HInstruction* instruction : move_in_order) {
+ HInstruction* position = nullptr;
+ if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+ if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
+ // A store can trivially move, but it can safely do so only if the heap
+ // location it stores to can also move.
+ // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions
+ // from the set and all their inputs.
+ continue;
+ }
+ // Find the position of the instruction we're storing into, filtering out this
+ // store and all other stores to that instruction.
+ position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true);
+
+ // The position needs to be dominated by the store, in order for the store to move there.
+ if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) {
+ continue;
+ }
+ } else {
+ // Find the ideal position within the post dominated blocks.
+ position = FindIdealPosition(instruction, post_dominated);
+ if (position == nullptr) {
+ continue;
+ }
+ }
+ // Bail if we could not find a position in the post dominated blocks (for example,
+ // if there are multiple users whose common dominator is not in the list of
+ // post dominated blocks).
+ if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
+ continue;
+ }
+ MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+ instruction->MoveBefore(position, /* ensure_safety */ false);
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
new file mode 100644
index 0000000000..59cda52a8c
--- /dev/null
+++ b/compiler/optimizing/code_sinking.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Optimization pass to move instructions into uncommon branches,
+ * when it is safe to do so.
+ */
+class CodeSinking : public HOptimization {
+ public:
+ CodeSinking(HGraph* graph, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kCodeSinkingPassName, stats) {}
+
+ void Run() OVERRIDE;
+
+ static constexpr const char* kCodeSinkingPassName = "code_sinking";
+
+ private:
+ // Try to move code only used by `end_block` and all its post-dominated / dominated
+ // blocks, to these blocks.
+ void SinkCodeToUncommonBranch(HBasicBlock* end_block);
+
+ DISALLOW_COPY_AND_ASSIGN(CodeSinking);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
index b459d24d7c..9f012cfbb2 100644
--- a/compiler/optimizing/common_dominator.h
+++ b/compiler/optimizing/common_dominator.h
@@ -36,12 +36,16 @@ class CommonDominator {
// Create a finder starting with a given block.
explicit CommonDominator(HBasicBlock* block)
: dominator_(block), chain_length_(ChainLength(block)) {
- DCHECK(block != nullptr);
}
// Update the common dominator with another block.
void Update(HBasicBlock* block) {
DCHECK(block != nullptr);
+ if (dominator_ == nullptr) {
+ dominator_ = block;
+ chain_length_ = ChainLength(block);
+ return;
+ }
HBasicBlock* block2 = dominator_;
DCHECK(block2 != nullptr);
if (block == block2) {
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5539413aad..3dfe17647c 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -57,21 +57,27 @@ static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
return false;
}
-/** Returns b^e for b,e >= 1. Sets overflow if arithmetic wrap-around occurred. */
+/** Computes a * b for a,b > 0 (at least until first overflow happens). */
+static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
+ if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
+ *overflow = true;
+ }
+ return a * b;
+}
+
+/** Returns b^e for b,e > 0. Sets overflow if arithmetic wrap-around occurred. */
static int64_t IntPow(int64_t b, int64_t e, /*out*/ bool* overflow) {
- DCHECK_GE(b, 1);
- DCHECK_GE(e, 1);
+ DCHECK_LT(0, b);
+ DCHECK_LT(0, e);
int64_t pow = 1;
while (e) {
if (e & 1) {
- int64_t oldpow = pow;
- pow *= b;
- if (pow < oldpow) {
- *overflow = true;
- }
+ pow = SafeMul(pow, b, overflow);
}
e >>= 1;
- b *= b;
+ if (e) {
+ b = SafeMul(b, b, overflow);
+ }
}
return pow;
}
@@ -377,6 +383,53 @@ bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) co
return false;
}
+bool InductionVarRange::IsUnitStride(HInstruction* instruction,
+ /*out*/ HInstruction** offset) const {
+ HLoopInformation* loop = nullptr;
+ HInductionVarAnalysis::InductionInfo* info = nullptr;
+ HInductionVarAnalysis::InductionInfo* trip = nullptr;
+ if (HasInductionInfo(instruction, instruction, &loop, &info, &trip)) {
+ if (info->induction_class == HInductionVarAnalysis::kLinear &&
+ info->op_b->operation == HInductionVarAnalysis::kFetch) {
+ int64_t stride_value = 0;
+ if (IsConstant(info->op_a, kExact, &stride_value) && stride_value == 1) {
+ int64_t off_value = 0;
+ if (IsConstant(info->op_b, kExact, &off_value) && off_value == 0) {
+ *offset = nullptr;
+ } else {
+ *offset = info->op_b->fetch;
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
+ HGraph* graph,
+ HBasicBlock* block) {
+ HInductionVarAnalysis::InductionInfo *trip =
+ induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+ if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+ HInstruction* taken_test = nullptr;
+ HInstruction* trip_expr = nullptr;
+ if (IsBodyTripCount(trip)) {
+ if (!GenerateCode(trip->op_b, nullptr, graph, block, &taken_test, false, false)) {
+ return nullptr;
+ }
+ }
+ if (GenerateCode(trip->op_a, nullptr, graph, block, &trip_expr, false, false)) {
+ if (taken_test != nullptr) {
+ HInstruction* zero = graph->GetConstant(trip->type, 0);
+ trip_expr = Insert(block, new (graph->GetArena()) HSelect(taken_test, trip_expr, zero, kNoDexPc));
+ }
+ return trip_expr;
+ }
+ }
+ return nullptr;
+}
+
//
// Private class methods.
//
@@ -1157,12 +1210,15 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
HInstruction* opb = nullptr;
switch (info->induction_class) {
case HInductionVarAnalysis::kInvariant:
- // Invariants (note that even though is_min does not impact code generation for
- // invariants, some effort is made to keep this parameter consistent).
+ // Invariants (note that since invariants only have other invariants as
+ // sub expressions, viz. no induction, there is no need to adjust is_min).
switch (info->operation) {
case HInductionVarAnalysis::kAdd:
- case HInductionVarAnalysis::kRem: // no proper is_min for second arg
- case HInductionVarAnalysis::kXor: // no proper is_min for second arg
+ case HInductionVarAnalysis::kSub:
+ case HInductionVarAnalysis::kMul:
+ case HInductionVarAnalysis::kDiv:
+ case HInductionVarAnalysis::kRem:
+ case HInductionVarAnalysis::kXor:
case HInductionVarAnalysis::kLT:
case HInductionVarAnalysis::kLE:
case HInductionVarAnalysis::kGT:
@@ -1174,6 +1230,12 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
switch (info->operation) {
case HInductionVarAnalysis::kAdd:
operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+ case HInductionVarAnalysis::kSub:
+ operation = new (graph->GetArena()) HSub(type, opa, opb); break;
+ case HInductionVarAnalysis::kMul:
+ operation = new (graph->GetArena()) HMul(type, opa, opb, kNoDexPc); break;
+ case HInductionVarAnalysis::kDiv:
+ operation = new (graph->GetArena()) HDiv(type, opa, opb, kNoDexPc); break;
case HInductionVarAnalysis::kRem:
operation = new (graph->GetArena()) HRem(type, opa, opb, kNoDexPc); break;
case HInductionVarAnalysis::kXor:
@@ -1194,16 +1256,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
return true;
}
break;
- case HInductionVarAnalysis::kSub: // second reversed!
- if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
- GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
- if (graph != nullptr) {
- *result = Insert(block, new (graph->GetArena()) HSub(type, opa, opb));
- }
- return true;
- }
- break;
- case HInductionVarAnalysis::kNeg: // reversed!
+ case HInductionVarAnalysis::kNeg:
if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
if (graph != nullptr) {
*result = Insert(block, new (graph->GetArena()) HNeg(type, opb));
@@ -1240,9 +1293,9 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
}
}
break;
- default:
- break;
- }
+ case HInductionVarAnalysis::kNop:
+ LOG(FATAL) << "unexpected invariant nop";
+ } // switch invariant operation
break;
case HInductionVarAnalysis::kLinear: {
// Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should
@@ -1293,7 +1346,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
}
break;
}
- }
+ } // switch induction class
}
return false;
}
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 6c424b78b9..0858d73982 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -24,7 +24,8 @@ namespace art {
/**
* This class implements range analysis on expressions within loops. It takes the results
* of induction variable analysis in the constructor and provides a public API to obtain
- * a conservative lower and upper bound value on each instruction in the HIR.
+ * a conservative lower and upper bound value or last value on each instruction in the HIR.
+ * The public API also provides a few general-purpose utility methods related to induction.
*
* The range analysis is done with a combination of symbolic and partial integral evaluation
* of expressions. The analysis avoids complications with wrap-around arithmetic on the integral
@@ -154,6 +155,19 @@ class InductionVarRange {
*/
bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
+ /**
+ * Checks if instruction is a unit stride induction inside the closest enveloping loop.
+ * Returns invariant offset on success.
+ */
+ bool IsUnitStride(HInstruction* instruction, /*out*/ HInstruction** offset) const;
+
+ /**
+ * Generates the trip count expression for the given loop. Code is generated in given block
+ * and graph. The expression is guarded by a taken test if needed. Returns the trip count
+ * expression on success or null otherwise.
+ */
+ HInstruction* GenerateTripCount(HLoopInformation* loop, HGraph* graph, HBasicBlock* block);
+
private:
/*
* Enum used in IsConstant() request.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index d81817fb09..fcdf8eb7dc 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -48,6 +48,11 @@ class InductionVarRangeTest : public CommonCompilerTest {
EXPECT_EQ(v1.is_known, v2.is_known);
}
+ void ExpectInt(int32_t value, HInstruction* i) {
+ ASSERT_TRUE(i->IsIntConstant());
+ EXPECT_EQ(value, i->AsIntConstant()->GetValue());
+ }
+
//
// Construction methods.
//
@@ -757,10 +762,20 @@ TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
// Last value (unsimplified).
HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
ASSERT_TRUE(last->IsAdd());
- ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, last->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1000, last->InputAt(0));
+ ExpectInt(0, last->InputAt(1));
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(1000, tc);
+ HInstruction* offset = nullptr;
+ EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+ EXPECT_TRUE(offset == nullptr);
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ ExpectInt(1000, tce);
}
TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -799,15 +814,27 @@ TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
// Last value (unsimplified).
HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
ASSERT_TRUE(last->IsSub());
- ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, last->InputAt(0));
ASSERT_TRUE(last->InputAt(1)->IsNeg());
last = last->InputAt(1)->InputAt(0);
ASSERT_TRUE(last->IsSub());
- ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
- EXPECT_EQ(0, last->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
- EXPECT_EQ(1000, last->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(0, last->InputAt(0));
+ ExpectInt(1000, last->InputAt(1));
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(1000, tc);
+ HInstruction* offset = nullptr;
+ EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ ASSERT_TRUE(tce->IsNeg());
+ last = tce->InputAt(0);
+ EXPECT_TRUE(last->IsSub());
+ ExpectInt(0, last->InputAt(0));
+ ExpectInt(1000, last->InputAt(1));
}
TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -851,27 +878,22 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
// Verify lower is 0+0.
ASSERT_TRUE(lower != nullptr);
ASSERT_TRUE(lower->IsAdd());
- ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
- EXPECT_EQ(0, lower->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(0, lower->InputAt(0));
+ ExpectInt(0, lower->InputAt(1));
// Verify upper is (V-1)+0.
ASSERT_TRUE(upper != nullptr);
ASSERT_TRUE(upper->IsAdd());
ASSERT_TRUE(upper->InputAt(0)->IsSub());
EXPECT_TRUE(upper->InputAt(0)->InputAt(0)->IsParameterValue());
- ASSERT_TRUE(upper->InputAt(0)->InputAt(1)->IsIntConstant());
- EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
- ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1, upper->InputAt(0)->InputAt(1));
+ ExpectInt(0, upper->InputAt(1));
// Verify taken-test is 0<V.
HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
ASSERT_TRUE(taken != nullptr);
ASSERT_TRUE(taken->IsLessThan());
- ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
- EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(0, taken->InputAt(0));
EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
// Replacement.
@@ -880,6 +902,21 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(1), v1);
ExpectEqual(Value(y_, 1, 0), v2);
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(0, tc); // unknown
+ HInstruction* offset = nullptr;
+ EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+ EXPECT_TRUE(offset == nullptr);
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ EXPECT_TRUE(tce->IsSelect()); // guarded by taken-test
+ ExpectInt(0, tce->InputAt(0));
+ EXPECT_TRUE(tce->InputAt(1)->IsParameterValue());
+ EXPECT_TRUE(tce->InputAt(2)->IsLessThan());
}
TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
@@ -923,32 +960,26 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
// Verify lower is 1000-((1000-V)-1).
ASSERT_TRUE(lower != nullptr);
ASSERT_TRUE(lower->IsSub());
- ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, lower->InputAt(0));
lower = lower->InputAt(1);
ASSERT_TRUE(lower->IsSub());
- ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
- EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1, lower->InputAt(1));
lower = lower->InputAt(0);
ASSERT_TRUE(lower->IsSub());
- ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, lower->InputAt(0));
EXPECT_TRUE(lower->InputAt(1)->IsParameterValue());
// Verify upper is 1000-0.
ASSERT_TRUE(upper != nullptr);
ASSERT_TRUE(upper->IsSub());
- ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
- ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
- EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+ ExpectInt(1000, upper->InputAt(0));
+ ExpectInt(0, upper->InputAt(1));
// Verify taken-test is 1000>V.
HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
ASSERT_TRUE(taken != nullptr);
ASSERT_TRUE(taken->IsGreaterThan());
- ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
- EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+ ExpectInt(1000, taken->InputAt(0));
EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
// Replacement.
@@ -957,6 +988,23 @@ TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
EXPECT_FALSE(needs_finite_test);
ExpectEqual(Value(y_, 1, 0), v1);
ExpectEqual(Value(999), v2);
+
+ // Loop logic.
+ int64_t tc = 0;
+ EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+ EXPECT_EQ(0, tc); // unknown
+ HInstruction* offset = nullptr;
+ EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+ HInstruction* tce = range_.GenerateTripCount(
+ loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+ ASSERT_TRUE(tce != nullptr);
+ EXPECT_TRUE(tce->IsSelect()); // guarded by taken-test
+ ExpectInt(0, tce->InputAt(0));
+ EXPECT_TRUE(tce->InputAt(1)->IsSub());
+ EXPECT_TRUE(tce->InputAt(2)->IsGreaterThan());
+ tce = tce->InputAt(1);
+ ExpectInt(1000, taken->InputAt(0));
+ EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
}
} // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 8c73f1d036..3e340908bf 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1272,12 +1272,19 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
caller_instruction_counter);
callee_graph->SetArtMethod(resolved_method);
- // When they are needed, allocate `inline_stats` on the heap instead
+ // When they are needed, allocate `inline_stats_` on the Arena instead
// of on the stack, as Clang might produce a stack frame too large
// for this function, that would not fit the requirements of the
// `-Wframe-larger-than` option.
- std::unique_ptr<OptimizingCompilerStats> inline_stats =
- (stats_ == nullptr) ? nullptr : MakeUnique<OptimizingCompilerStats>();
+ if (stats_ != nullptr) {
+ // Reuse one object for all inline attempts from this caller to keep Arena memory usage low.
+ if (inline_stats_ == nullptr) {
+ void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc);
+ inline_stats_ = new (storage) OptimizingCompilerStats;
+ } else {
+ inline_stats_->Reset();
+ }
+ }
HGraphBuilder builder(callee_graph,
&dex_compilation_unit,
&outer_compilation_unit_,
@@ -1285,7 +1292,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
*code_item,
compiler_driver_,
codegen_,
- inline_stats.get(),
+ inline_stats_,
resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()),
dex_cache,
handles_);
@@ -1468,6 +1475,11 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
<< "No instructions can be added to the inner graph during inlining into the outer graph";
+ if (stats_ != nullptr) {
+ DCHECK(inline_stats_ != nullptr);
+ inline_stats_->AddTo(stats_);
+ }
+
return true;
}
@@ -1476,11 +1488,11 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
const DexCompilationUnit& dex_compilation_unit) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
- HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
+ HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
HConstantFolding fold(callee_graph, "constant_folding$inliner");
HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
- InstructionSimplifier simplify(callee_graph, stats_);
- IntrinsicsRecognizer intrinsics(callee_graph, stats_);
+ InstructionSimplifier simplify(callee_graph, inline_stats_);
+ IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
HOptimization* optimizations[] = {
&intrinsics,
@@ -1504,7 +1516,7 @@ size_t HInliner::RunOptimizations(HGraph* callee_graph,
dex_compilation_unit,
compiler_driver_,
handles_,
- stats_,
+ inline_stats_,
total_number_of_dex_registers_ + code_item->registers_size_,
depth_ + 1);
inliner.Run();
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 11aacab802..75d025ae41 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -51,7 +51,8 @@ class HInliner : public HOptimization {
total_number_of_dex_registers_(total_number_of_dex_registers),
depth_(depth),
number_of_inlined_instructions_(0),
- handles_(handles) {}
+ handles_(handles),
+ inline_stats_(nullptr) {}
void Run() OVERRIDE;
@@ -218,6 +219,10 @@ class HInliner : public HOptimization {
size_t number_of_inlined_instructions_;
VariableSizedHandleScope* const handles_;
+ // Used to record stats about optimizations on the inlined graph.
+ // If the inlining is successful, these stats are merged to the caller graph's stats.
+ OptimizingCompilerStats* inline_stats_;
+
DISALLOW_COPY_AND_ASSIGN(HInliner);
};
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 17d683f357..8df80adc9f 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -19,6 +19,7 @@
#include "art_method.h"
#include "class_linker.h"
#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "invoke_type.h"
#include "mirror/dex_cache-inl.h"
#include "nodes.h"
@@ -178,4 +179,112 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
return os;
}
+void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
+ CodeGenerator* codegen,
+ Location return_location,
+ Location first_argument_location) {
+ if (Runtime::Current()->IsAotCompiler()) {
+ if (codegen->GetCompilerOptions().IsBootImage() ||
+ codegen->GetCompilerOptions().GetCompilePic()) {
+ // TODO(ngeoffray): Support boot image compilation.
+ return;
+ }
+ }
+
+ IntegerValueOfInfo info = ComputeIntegerValueOfInfo();
+
+ // Most common case is that we have found all we needed (classes are initialized
+ // and in the boot image). Bail if not.
+ if (info.integer_cache == nullptr ||
+ info.integer == nullptr ||
+ info.cache == nullptr ||
+ info.value_offset == 0 ||
+ // low and high cannot be 0, per the spec.
+ info.low == 0 ||
+ info.high == 0) {
+ LOG(INFO) << "Integer.valueOf will not be optimized";
+ return;
+ }
+
+ // The intrinsic will call if it needs to allocate a j.l.Integer.
+ LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetArena()) LocationSummary(
+ invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
+ if (!invoke->InputAt(0)->IsConstant()) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+ locations->AddTemp(first_argument_location);
+ locations->SetOut(return_location);
+}
+
+IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() {
+ // Note that we could cache all of the data looked up here. but there's no good
+ // location for it. We don't want to add it to WellKnownClasses, to avoid creating global
+ // jni values. Adding it as state to the compiler singleton seems like wrong
+ // separation of concerns.
+ // The need for this data should be pretty rare though.
+
+ // The most common case is that the classes are in the boot image and initialized,
+ // which is easy to generate code for. We bail if not.
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ Runtime* runtime = Runtime::Current();
+ ClassLinker* class_linker = runtime->GetClassLinker();
+ gc::Heap* heap = runtime->GetHeap();
+ IntegerValueOfInfo info;
+ info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;");
+ if (info.integer_cache == nullptr) {
+ self->ClearException();
+ return info;
+ }
+ if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) {
+ // Optimization only works if the class is initialized and in the boot image.
+ return info;
+ }
+ info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;");
+ if (info.integer == nullptr) {
+ self->ClearException();
+ return info;
+ }
+ if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) {
+ // Optimization only works if the class is initialized and in the boot image.
+ return info;
+ }
+
+ ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
+ if (field == nullptr) {
+ return info;
+ }
+ info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>(
+ field->GetObject(info.integer_cache).Ptr());
+ if (info.cache == nullptr) {
+ return info;
+ }
+
+ if (!heap->ObjectIsInBootImageSpace(info.cache)) {
+ // Optimization only works if the object is in the boot image.
+ return info;
+ }
+
+ field = info.integer->FindDeclaredInstanceField("value", "I");
+ if (field == nullptr) {
+ return info;
+ }
+ info.value_offset = field->GetOffset().Int32Value();
+
+ field = info.integer_cache->FindDeclaredStaticField("low", "I");
+ if (field == nullptr) {
+ return info;
+ }
+ info.low = field->GetInt(info.integer_cache);
+
+ field = info.integer_cache->FindDeclaredStaticField("high", "I");
+ if (field == nullptr) {
+ return info;
+ }
+ info.high = field->GetInt(info.integer_cache);
+
+ DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1);
+ return info;
+}
+
} // namespace art
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 6425e1313f..9da5a7fa3b 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -113,6 +113,39 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
}
+ static void ComputeIntegerValueOfLocations(HInvoke* invoke,
+ CodeGenerator* codegen,
+ Location return_location,
+ Location first_argument_location);
+
+ // Temporary data structure for holding Integer.valueOf useful data. We only
+ // use it if the mirror::Class* are in the boot image, so it is fine to keep raw
+ // mirror::Class pointers in this structure.
+ struct IntegerValueOfInfo {
+ IntegerValueOfInfo()
+ : integer_cache(nullptr),
+ integer(nullptr),
+ cache(nullptr),
+ low(0),
+ high(0),
+ value_offset(0) {}
+
+ // The java.lang.IntegerCache class.
+ mirror::Class* integer_cache;
+ // The java.lang.Integer class.
+ mirror::Class* integer;
+ // Value of java.lang.IntegerCache#cache.
+ mirror::ObjectArray<mirror::Object>* cache;
+ // Value of java.lang.IntegerCache#low.
+ int32_t low;
+ // Value of java.lang.IntegerCache#high.
+ int32_t high;
+ // The offset of java.lang.Integer.value.
+ int32_t value_offset;
+ };
+
+ static IntegerValueOfInfo ComputeIntegerValueOfInfo();
+
protected:
IntrinsicVisitor() {}
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index c262cf983d..86000e9356 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -129,6 +129,7 @@ class ReadBarrierSystemArrayCopySlowPathARM : public SlowPathCode {
IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
: arena_(codegen->GetGraph()->GetArena()),
+ codegen_(codegen),
assembler_(codegen->GetAssembler()),
features_(codegen->GetInstructionSetFeatures()) {}
@@ -2644,6 +2645,75 @@ void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ Location::RegisterLocation(R0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ ArmAssembler* const assembler = GetAssembler();
+
+ Register out = locations->Out().AsRegister<Register>();
+ InvokeRuntimeCallingConvention calling_convention;
+ Register argument = calling_convention.GetRegisterAt(0);
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ LoadImmediate(IP, value);
+ __ StoreToOffset(kStoreWord, IP, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ // Check bounds of our cache.
+ __ AddConstant(out, in, -info.low);
+ __ CmpConstant(out, info.high - info.low + 1);
+ Label allocate, done;
+ __ b(&allocate, HS);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out);
+ __ MaybeUnpoisonHeapReference(out);
+ __ b(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 7f20ea4b1f..2840863632 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -51,6 +51,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGenerator* codegen_;
ArmAssembler* assembler_;
const ArmInstructionSetFeatures& features_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 86e54294ae..6c3938c1a9 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2924,6 +2924,79 @@ void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ calling_convention.GetReturnLocation(Primitive::kPrimNot),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ MacroAssembler* masm = GetVIXLAssembler();
+
+ Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
+ UseScratchRegisterScope temps(masm);
+ Register temp = temps.AcquireW();
+ InvokeRuntimeCallingConvention calling_convention;
+ Register argument = calling_convention.GetRegisterAt(0);
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ Mov(temp.W(), value);
+ __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
+ // Check bounds of our cache.
+ __ Add(out.W(), in.W(), -info.low);
+ __ Cmp(out.W(), info.high - info.low + 1);
+ vixl::aarch64::Label allocate, done;
+ __ B(&allocate, hs);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ MemOperand source = HeapOperand(
+ temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
+ codegen_->Load(Primitive::kPrimNot, out, source);
+ codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
+ __ B(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 28e41cb086..3c53517b28 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -38,7 +38,8 @@ class CodeGeneratorARM64;
class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
public:
- explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena) : arena_(arena) {}
+ explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena, CodeGeneratorARM64* codegen)
+ : arena_(arena), codegen_(codegen) {}
// Define visitor methods.
@@ -56,6 +57,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGeneratorARM64* codegen_;
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64);
};
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 70a3d38c13..aa89deae34 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -203,6 +203,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
: arena_(codegen->GetGraph()->GetArena()),
+ codegen_(codegen),
assembler_(codegen->GetAssembler()),
features_(codegen->GetInstructionSetFeatures()) {}
@@ -2988,6 +2989,77 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
__ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ LocationFrom(r0),
+ LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ ArmVIXLAssembler* const assembler = GetAssembler();
+
+ vixl32::Register out = RegisterFrom(locations->Out());
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ vixl32::Register temp = temps.Acquire();
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ vixl32::Register argument = calling_convention.GetRegisterAt(0);
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address =
+ dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ Mov(temp, value);
+ assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ }
+ } else {
+ vixl32::Register in = RegisterFrom(locations->InAt(0));
+ // Check bounds of our cache.
+ __ Add(out, in, -info.low);
+ __ Cmp(out, info.high - info.low + 1);
+ vixl32::Label allocate, done;
+ __ B(hs, &allocate);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
+ assembler->MaybeUnpoisonHeapReference(out);
+ __ B(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
+ // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+ // one.
+ codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 6e79cb76a1..023cba1349 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -47,6 +47,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
private:
ArenaAllocator* arena_;
+ CodeGenerator* codegen_;
ArmVIXLAssembler* assembler_;
const ArmInstructionSetFeatures& features_;
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 64a68403e9..ba006edfa2 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1572,6 +1572,10 @@ static void GenUnsafeGet(HInvoke* invoke,
__ Lwr(trg, TMP, 0);
__ Lwl(trg, TMP, 3);
}
+
+ if (type == Primitive::kPrimNot) {
+ __ MaybeUnpoisonHeapReference(trg);
+ }
}
}
@@ -1663,6 +1667,11 @@ static void GenUnsafePut(LocationSummary* locations,
if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
Register value = locations->InAt(3).AsRegister<Register>();
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(AT, value);
+ value = AT;
+ }
+
if (is_R6) {
__ Sw(value, TMP, 0);
} else {
@@ -1852,13 +1861,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
codegen->MarkGCCard(base, value, value_can_be_null);
}
+ MipsLabel loop_head, exit_loop;
+ __ Addu(TMP, base, offset_lo);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value`, if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
+ }
+
// do {
// tmp_value = [tmp_ptr] - expected;
// } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
// result = tmp_value != 0;
- MipsLabel loop_head, exit_loop;
- __ Addu(TMP, base, offset_lo);
__ Sync(0);
__ Bind(&loop_head);
if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
@@ -1868,8 +1887,8 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ LlR2(out, TMP);
}
} else {
- LOG(FATAL) << "Unsupported op size " << type;
- UNREACHABLE();
+ LOG(FATAL) << "Unsupported op size " << type;
+ UNREACHABLE();
}
__ Subu(out, out, expected); // If we didn't get the 'expected'
__ Sltiu(out, out, 1); // value, set 'out' to false, and
@@ -1894,6 +1913,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// cycle atomically then retry.
__ Bind(&exit_loop);
__ Sync(0);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value`, if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
}
// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
@@ -1989,20 +2017,24 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
__ LoadConst32(out, 1);
return;
}
-
- // Check if input is null, return false if it is.
- __ Beqz(arg, &return_false);
+ StringEqualsOptimizations optimizations(invoke);
+ if (!optimizations.GetArgumentNotNull()) {
+ // Check if input is null, return false if it is.
+ __ Beqz(arg, &return_false);
+ }
// Reference equality check, return true if same reference.
__ Beq(str, arg, &return_true);
- // Instanceof check for the argument by comparing class fields.
- // All string objects must have the same type since String cannot be subclassed.
- // Receiver must be a string object, so its class field is equal to all strings' class fields.
- // If the argument is a string object, its class field must be equal to receiver's class field.
- __ Lw(temp1, str, class_offset);
- __ Lw(temp2, arg, class_offset);
- __ Bne(temp1, temp2, &return_false);
+ if (!optimizations.GetArgumentIsString()) {
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ Lw(temp1, str, class_offset);
+ __ Lw(temp2, arg, class_offset);
+ __ Bne(temp1, temp2, &return_false);
+ }
// Load `count` fields of this and argument strings.
__ Lw(temp1, str, count_offset);
@@ -2682,6 +2714,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt)
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
+
UNREACHABLE_INTRINSICS(MIPS)
#undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 3888828722..21c5074a1c 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1187,6 +1187,7 @@ static void GenUnsafeGet(HInvoke* invoke,
case Primitive::kPrimNot:
__ Lwu(trg, TMP, 0);
+ __ MaybeUnpoisonHeapReference(trg);
break;
case Primitive::kPrimLong:
@@ -1285,7 +1286,12 @@ static void GenUnsafePut(LocationSummary* locations,
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimNot:
- __ Sw(value, TMP, 0);
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(AT, value);
+ __ Sw(AT, TMP, 0);
+ } else {
+ __ Sw(value, TMP, 0);
+ }
break;
case Primitive::kPrimLong:
@@ -1454,13 +1460,23 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
codegen->MarkGCCard(base, value, value_can_be_null);
}
+ Mips64Label loop_head, exit_loop;
+ __ Daddu(TMP, base, offset);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value`, if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
+ }
+
// do {
// tmp_value = [tmp_ptr] - expected;
// } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
// result = tmp_value != 0;
- Mips64Label loop_head, exit_loop;
- __ Daddu(TMP, base, offset);
__ Sync(0);
__ Bind(&loop_head);
if (type == Primitive::kPrimLong) {
@@ -1469,6 +1485,11 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// Note: We will need a read barrier here, when read barrier
// support is added to the MIPS64 back end.
__ Ll(out, TMP);
+ if (type == Primitive::kPrimNot) {
+ // The LL instruction sign-extends the 32-bit value, but
+ // 32-bit references must be zero-extended. Zero-extend `out`.
+ __ Dext(out, out, 0, 32);
+ }
}
__ Dsubu(out, out, expected); // If we didn't get the 'expected'
__ Sltiu(out, out, 1); // value, set 'out' to false, and
@@ -1487,6 +1508,15 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// cycle atomically then retry.
__ Bind(&exit_loop);
__ Sync(0);
+
+ if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value`, if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
}
// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
@@ -1593,19 +1623,24 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
return;
}
- // Check if input is null, return false if it is.
- __ Beqzc(arg, &return_false);
+ StringEqualsOptimizations optimizations(invoke);
+ if (!optimizations.GetArgumentNotNull()) {
+ // Check if input is null, return false if it is.
+ __ Beqzc(arg, &return_false);
+ }
// Reference equality check, return true if same reference.
__ Beqc(str, arg, &return_true);
- // Instanceof check for the argument by comparing class fields.
- // All string objects must have the same type since String cannot be subclassed.
- // Receiver must be a string object, so its class field is equal to all strings' class fields.
- // If the argument is a string object, its class field must be equal to receiver's class field.
- __ Lw(temp1, str, class_offset);
- __ Lw(temp2, arg, class_offset);
- __ Bnec(temp1, temp2, &return_false);
+ if (!optimizations.GetArgumentIsString()) {
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ Lw(temp1, str, class_offset);
+ __ Lw(temp2, arg, class_offset);
+ __ Bnec(temp1, temp2, &return_false);
+ }
// Load `count` fields of this and argument strings.
__ Lw(temp1, str, count_offset);
@@ -2075,6 +2110,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt)
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
+UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
+
UNREACHABLE_INTRINSICS(MIPS64)
#undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index e1b7ea53b4..a671788ff5 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3335,6 +3335,65 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ Bind(intrinsic_slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ Location::RegisterLocation(EAX),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ X86Assembler* assembler = GetAssembler();
+
+ Register out = locations->Out().AsRegister<Register>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ movl(out, Immediate(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), Immediate(value));
+ }
+ } else {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ // Check bounds of our cache.
+ __ leal(out, Address(in, -info.low));
+ __ cmpl(out, Immediate(info.high - info.low + 1));
+ NearLabel allocate, done;
+ __ j(kAboveEqual, &allocate);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ movl(out, Address(out, TIMES_4, data_offset + address));
+ __ MaybeUnpoisonHeapReference(out);
+ __ jmp(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), in);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 05d270a4e6..9a6dd985a4 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -39,7 +39,6 @@ IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX8
: arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
}
-
X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
}
@@ -2995,6 +2994,65 @@ void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+ InvokeRuntimeCallingConvention calling_convention;
+ IntrinsicVisitor::ComputeIntegerValueOfLocations(
+ invoke,
+ codegen_,
+ Location::RegisterLocation(RAX),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+ IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ LocationSummary* locations = invoke->GetLocations();
+ X86_64Assembler* assembler = GetAssembler();
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (invoke->InputAt(0)->IsConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (value >= info.low && value <= info.high) {
+ // Just embed the j.l.Integer in the code.
+ ScopedObjectAccess soa(Thread::Current());
+ mirror::Object* boxed = info.cache->Get(value + (-info.low));
+ DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+ __ movl(out, Immediate(address));
+ } else {
+ // Allocate and initialize a new j.l.Integer.
+ // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+ // JIT object table.
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), Immediate(value));
+ }
+ } else {
+ CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
+ // Check bounds of our cache.
+ __ leal(out, Address(in, -info.low));
+ __ cmpl(out, Immediate(info.high - info.low + 1));
+ NearLabel allocate, done;
+ __ j(kAboveEqual, &allocate);
+ // If the value is within the bounds, load the j.l.Integer directly from the array.
+ uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+ uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+ __ movl(out, Address(out, TIMES_4, data_offset + address));
+ __ MaybeUnpoisonHeapReference(out);
+ __ jmp(&done);
+ __ Bind(&allocate);
+ // Otherwise allocate and initialize a new j.l.Integer.
+ address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+ __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(address));
+ codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ __ movl(Address(out, info.value_offset), in);
+ __ Bind(&done);
+ }
+}
+
UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 2d3c00fb97..46ba048738 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -38,7 +38,8 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
position_(pos),
is_singleton_(true),
is_singleton_and_not_returned_(true),
- is_singleton_and_not_deopt_visible_(true) {
+ is_singleton_and_not_deopt_visible_(true),
+ has_index_aliasing_(false) {
CalculateEscape(reference_,
nullptr,
&is_singleton_,
@@ -68,13 +69,29 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
}
+ bool HasIndexAliasing() {
+ return has_index_aliasing_;
+ }
+
+ void SetHasIndexAliasing(bool has_index_aliasing) {
+ // Only allow setting to true.
+ DCHECK(has_index_aliasing);
+ has_index_aliasing_ = has_index_aliasing;
+ }
+
private:
HInstruction* const reference_;
const size_t position_; // position in HeapLocationCollector's ref_info_array_.
- bool is_singleton_; // can only be referred to by a single name in the method,
- bool is_singleton_and_not_returned_; // and not returned to caller,
- bool is_singleton_and_not_deopt_visible_; // and not used as an environment local of HDeoptimize.
+ // Can only be referred to by a single name in the method.
+ bool is_singleton_;
+ // Is singleton and not returned to caller.
+ bool is_singleton_and_not_returned_;
+ // Is singleton and not used as an environment local of HDeoptimize.
+ bool is_singleton_and_not_deopt_visible_;
+ // Some heap locations with reference_ have array index aliasing,
+ // e.g. arr[i] and arr[j] may be the same location.
+ bool has_index_aliasing_;
DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
};
@@ -321,6 +338,12 @@ class HeapLocationCollector : public HGraphVisitor {
// Different constant indices do not alias.
return false;
}
+ ReferenceInfo* ref_info = loc1->GetReferenceInfo();
+ if (ref_info->IsSingleton()) {
+ // This is guaranteed by the CanReferencesAlias() test above.
+ DCHECK_EQ(ref_info, loc2->GetReferenceInfo());
+ ref_info->SetHasIndexAliasing(true);
+ }
}
return true;
}
@@ -497,7 +520,8 @@ class LSEVisitor : public HGraphVisitor {
removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
- singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
+ singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+ singleton_new_arrays_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -534,20 +558,24 @@ class LSEVisitor : public HGraphVisitor {
}
// At this point, stores in possibly_removed_stores_ can be safely removed.
- for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) {
- HInstruction* store = possibly_removed_stores_[i];
+ for (HInstruction* store : possibly_removed_stores_) {
DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
store->GetBlock()->RemoveInstruction(store);
}
// Eliminate allocations that are not used.
- for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) {
- HInstruction* new_instance = singleton_new_instances_[i];
+ for (HInstruction* new_instance : singleton_new_instances_) {
if (!new_instance->HasNonEnvironmentUses()) {
new_instance->RemoveEnvironmentUsers();
new_instance->GetBlock()->RemoveInstruction(new_instance);
}
}
+ for (HInstruction* new_array : singleton_new_arrays_) {
+ if (!new_array->HasNonEnvironmentUses()) {
+ new_array->RemoveEnvironmentUsers();
+ new_array->GetBlock()->RemoveInstruction(new_array);
+ }
+ }
}
private:
@@ -558,7 +586,7 @@ class LSEVisitor : public HGraphVisitor {
void KeepIfIsStore(HInstruction* heap_value) {
if (heap_value == kDefaultHeapValue ||
heap_value == kUnknownHeapValue ||
- !heap_value->IsInstanceFieldSet()) {
+ !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) {
return;
}
auto idx = std::find(possibly_removed_stores_.begin(),
@@ -734,13 +762,16 @@ class LSEVisitor : public HGraphVisitor {
heap_values[idx] = constant;
return;
}
- if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
- HInstruction* store = heap_value;
- // This load must be from a singleton since it's from the same field
- // that a "removed" store puts the value. That store must be to a singleton's field.
- DCHECK(ref_info->IsSingleton());
- // Get the real heap value of the store.
- heap_value = store->InputAt(1);
+ if (heap_value != kUnknownHeapValue) {
+ if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
+ HInstruction* store = heap_value;
+ // This load must be from a singleton since it's from the same
+ // field/element that a "removed" store puts the value. That store
+ // must be to a singleton's field/element.
+ DCHECK(ref_info->IsSingleton());
+ // Get the real heap value of the store.
+ heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2);
+ }
}
if (heap_value == kUnknownHeapValue) {
// Load isn't eliminated. Put the load as the value into the HeapLocation.
@@ -796,19 +827,19 @@ class LSEVisitor : public HGraphVisitor {
if (Equal(heap_value, value)) {
// Store into the heap location with the same value.
same_value = true;
- } else if (index != nullptr) {
- // For array element, don't eliminate stores since it can be easily aliased
- // with non-constant index.
+ } else if (index != nullptr && ref_info->HasIndexAliasing()) {
+ // For array element, don't eliminate stores if the index can be
+ // aliased.
} else if (ref_info->IsSingletonAndRemovable()) {
- // Store into a field of a singleton that's not returned. The value cannot be
- // killed due to aliasing/invocation. It can be redundant since future loads can
- // directly get the value set by this instruction. The value can still be killed due to
- // merging or loop side effects. Stores whose values are killed due to merging/loop side
- // effects later will be removed from possibly_removed_stores_ when that is detected.
+ // Store into a field/element of a singleton instance/array that's not returned.
+ // The value cannot be killed due to aliasing/invocation. It can be redundant since
+ // future loads can directly get the value set by this instruction. The value can
+ // still be killed due to merging or loop side effects. Stores whose values are
+ // killed due to merging/loop side effects later will be removed from
+ // possibly_removed_stores_ when that is detected.
possibly_redundant = true;
HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
- DCHECK(new_instance != nullptr);
- if (new_instance->IsFinalizable()) {
+ if (new_instance != nullptr && new_instance->IsFinalizable()) {
// Finalizable objects escape globally. Need to keep the store.
possibly_redundant = false;
} else {
@@ -834,7 +865,7 @@ class LSEVisitor : public HGraphVisitor {
if (!same_value) {
if (possibly_redundant) {
- DCHECK(instruction->IsInstanceFieldSet());
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet());
// Put the store as the heap value. If the value is loaded from heap
// by a load later, this store isn't really redundant.
heap_values[idx] = instruction;
@@ -995,6 +1026,27 @@ class LSEVisitor : public HGraphVisitor {
}
}
+ void VisitNewArray(HNewArray* new_array) OVERRIDE {
+ ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
+ if (ref_info == nullptr) {
+ // new_array isn't used for array accesses. No need to process it.
+ return;
+ }
+ if (ref_info->IsSingletonAndRemovable()) {
+ singleton_new_arrays_.push_back(new_array);
+ }
+ ArenaVector<HInstruction*>& heap_values =
+ heap_values_for_[new_array->GetBlock()->GetBlockId()];
+ for (size_t i = 0; i < heap_values.size(); i++) {
+ HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+ HInstruction* ref = location->GetReferenceInfo()->GetReference();
+ if (ref == new_array && location->GetIndex() != nullptr) {
+ // Array elements are set to default heap values.
+ heap_values[i] = kDefaultHeapValue;
+ }
+ }
+ }
+
// Find an instruction's substitute if it should be removed.
// Return the same instruction if it should not be removed.
HInstruction* FindSubstitute(HInstruction* instruction) {
@@ -1023,6 +1075,7 @@ class LSEVisitor : public HGraphVisitor {
ArenaVector<HInstruction*> possibly_removed_stores_;
ArenaVector<HInstruction*> singleton_new_instances_;
+ ArenaVector<HInstruction*> singleton_new_arrays_;
DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
};
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 26c9ab83c2..318d83bf40 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -16,6 +16,7 @@
#include "loop_optimization.h"
+#include "driver/compiler_driver.h"
#include "linear_order.h"
namespace art {
@@ -57,8 +58,10 @@ static bool IsEarlyExit(HLoopInformation* loop_info) {
//
HLoopOptimization::HLoopOptimization(HGraph* graph,
+ CompilerDriver* compiler_driver,
HInductionVarAnalysis* induction_analysis)
: HOptimization(graph, kLoopOptimizationPassName),
+ compiler_driver_(compiler_driver),
induction_range_(induction_analysis),
loop_allocator_(nullptr),
top_loop_(nullptr),
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9ddab4150c..0b798fc7a9 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -23,13 +23,17 @@
namespace art {
+class CompilerDriver;
+
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
* the detected nested loops, such as removal of dead induction and empty loops.
*/
class HLoopOptimization : public HOptimization {
public:
- HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis);
+ HLoopOptimization(HGraph* graph,
+ CompilerDriver* compiler_driver,
+ HInductionVarAnalysis* induction_analysis);
void Run() OVERRIDE;
@@ -76,6 +80,9 @@ class HLoopOptimization : public HOptimization {
bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
void RemoveDeadInstructions(const HInstructionList& list);
+ // Compiler driver (to query ISA features).
+ const CompilerDriver* compiler_driver_;
+
// Range information based on prior induction variable analysis.
InductionVarRange induction_range_;
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 9a6b4935b2..5b9350689e 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -31,7 +31,7 @@ class LoopOptimizationTest : public CommonCompilerTest {
allocator_(&pool_),
graph_(CreateGraph(&allocator_)),
iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
- loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) {
+ loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) {
BuildGraph();
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8a9e61875a..c39aed2c6a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1914,6 +1914,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
virtual bool IsControlFlow() const { return false; }
+ // Can the instruction throw?
+ // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance),
+ // could throw OOME, but it is still OK to remove them if they are unused.
virtual bool CanThrow() const { return false; }
bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); }
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f72bd6a5a3..607b9433ae 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -56,6 +56,7 @@
#include "builder.h"
#include "cha_guard_optimization.h"
#include "code_generator.h"
+#include "code_sinking.h"
#include "compiled_method.h"
#include "compiler.h"
#include "constant_folding.h"
@@ -518,9 +519,11 @@ static HOptimization* BuildOptimization(
} else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
return new (arena) SideEffectsAnalysis(graph);
} else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
- return new (arena) HLoopOptimization(graph, most_recent_induction);
+ return new (arena) HLoopOptimization(graph, driver, most_recent_induction);
} else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
return new (arena) CHAGuardOptimization(graph);
+ } else if (opt_name == CodeSinking::kCodeSinkingPassName) {
+ return new (arena) CodeSinking(graph, stats);
#ifdef ART_ENABLE_CODEGEN_arm
} else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
@@ -770,13 +773,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
HConstantFolding* fold2 = new (arena) HConstantFolding(
graph, "constant_folding$after_inlining");
HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
- SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
- GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
- LICM* licm = new (arena) LICM(graph, *side_effects, stats);
- LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
+ SideEffectsAnalysis* side_effects1 = new (arena) SideEffectsAnalysis(
+ graph, "side_effects$before_gvn");
+ SideEffectsAnalysis* side_effects2 = new (arena) SideEffectsAnalysis(
+ graph, "side_effects$before_lse");
+ GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects1);
+ LICM* licm = new (arena) LICM(graph, *side_effects1, stats);
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
- BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
- HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction);
+ BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
+ HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
+ LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2);
HSharpening* sharpening = new (arena) HSharpening(
graph, codegen, dex_compilation_unit, driver, handles);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
@@ -787,6 +793,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
graph, stats, "instruction_simplifier$before_codegen");
IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
+ CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
HOptimization* optimizations1[] = {
intrinsics,
@@ -806,7 +813,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
fold2, // TODO: if we don't inline we can also skip fold2.
simplify2,
dce2,
- side_effects,
+ side_effects1,
gvn,
licm,
induction,
@@ -814,9 +821,11 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
loop,
fold3, // evaluates code generated by dynamic bce
simplify3,
+ side_effects2,
lse,
cha_guard,
dce3,
+ code_sinking,
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 203b1ec7ec..ae9a8119a7 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
+#include <atomic>
#include <iomanip>
#include <string>
#include <type_traits>
@@ -67,14 +68,18 @@ enum MethodCompilationStat {
kImplicitNullCheckGenerated,
kExplicitNullCheckGenerated,
kSimplifyIf,
+ kInstructionSunk,
kLastStat
};
class OptimizingCompilerStats {
public:
- OptimizingCompilerStats() {}
+ OptimizingCompilerStats() {
+ // The std::atomic<> default constructor leaves values uninitialized, so initialize them now.
+ Reset();
+ }
- void RecordStat(MethodCompilationStat stat, size_t count = 1) {
+ void RecordStat(MethodCompilationStat stat, uint32_t count = 1) {
compile_stats_[stat] += count;
}
@@ -93,7 +98,7 @@ class OptimizingCompilerStats {
<< " methods: " << std::fixed << std::setprecision(2)
<< compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
- for (int i = 0; i < kLastStat; i++) {
+ for (size_t i = 0; i < kLastStat; i++) {
if (compile_stats_[i] != 0) {
LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": "
<< compile_stats_[i];
@@ -102,6 +107,21 @@ class OptimizingCompilerStats {
}
}
+ void AddTo(OptimizingCompilerStats* other_stats) {
+ for (size_t i = 0; i != kLastStat; ++i) {
+ uint32_t count = compile_stats_[i];
+ if (count != 0) {
+ other_stats->RecordStat(static_cast<MethodCompilationStat>(i), count);
+ }
+ }
+ }
+
+ void Reset() {
+ for (size_t i = 0; i != kLastStat; ++i) {
+ compile_stats_[i] = 0u;
+ }
+ }
+
private:
std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
std::string name;
@@ -147,6 +167,7 @@ class OptimizingCompilerStats {
case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
case kSimplifyIf: name = "SimplifyIf"; break;
+ case kInstructionSunk: name = "InstructionSunk"; break;
case kLastStat:
LOG(FATAL) << "invalid stat "
@@ -156,7 +177,7 @@ class OptimizingCompilerStats {
return "OptStat#" + name;
}
- AtomicInteger compile_stats_[kLastStat];
+ std::atomic<uint32_t> compile_stats_[kLastStat];
DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats);
};
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index bac6088bf7..fea47e66d9 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -25,8 +25,8 @@ namespace art {
class SideEffectsAnalysis : public HOptimization {
public:
- explicit SideEffectsAnalysis(HGraph* graph)
- : HOptimization(graph, kSideEffectsAnalysisPassName),
+ SideEffectsAnalysis(HGraph* graph, const char* pass_name = kSideEffectsAnalysisPassName)
+ : HOptimization(graph, pass_name),
graph_(graph),
block_effects_(graph->GetBlocks().size(),
graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)),
@@ -41,7 +41,7 @@ class SideEffectsAnalysis : public HOptimization {
bool HasRun() const { return has_run_; }
- static constexpr const char* kSideEffectsAnalysisPassName = "SideEffects";
+ static constexpr const char* kSideEffectsAnalysisPassName = "side_effects";
private:
void UpdateLoopEffects(HLoopInformation* info, SideEffects effects);
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 322f6c4d70..e81e767575 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -135,6 +135,16 @@ class ArmVIXLMacroAssembler FINAL : public vixl32::MacroAssembler {
// jumping within 2KB range. For B(cond, label), because the supported branch range is 256
// bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps.
void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true);
+
+ // Use literal for generating double constant if it doesn't fit VMOV encoding.
+ void Vmov(vixl32::DRegister rd, double imm) {
+ if (vixl::VFP::IsImmFP64(imm)) {
+ MacroAssembler::Vmov(rd, imm);
+ } else {
+ MacroAssembler::Vldr(rd, imm);
+ }
+ }
+ using MacroAssembler::Vmov;
};
class ArmVIXLAssembler FINAL : public Assembler {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 5e83e825ed..2e2231b07d 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -3475,8 +3475,8 @@ void MipsAssembler::LoadRef(ManagedRegister mdest, ManagedRegister base, MemberO
CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
base.AsMips().AsCoreRegister(), offs.Int32Value());
- if (kPoisonHeapReferences && unpoison_reference) {
- Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister());
+ if (unpoison_reference) {
+ MaybeUnpoisonHeapReference(dest.AsCoreRegister());
}
}
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 2fca185ec3..1a5a23d10b 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -501,8 +501,10 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
bool is_float = false);
private:
+ // This will be used as an argument for loads/stores
+ // when there is no need for implicit null checks.
struct NoImplicitNullChecker {
- void operator()() {}
+ void operator()() const {}
};
public:
@@ -727,6 +729,38 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
void Pop(Register rd);
void PopAndReturn(Register rd, Register rt);
+ //
+ // Heap poisoning.
+ //
+
+ // Poison a heap reference contained in `src` and store it in `dst`.
+ void PoisonHeapReference(Register dst, Register src) {
+ // dst = -src.
+ Subu(dst, ZERO, src);
+ }
+ // Poison a heap reference contained in `reg`.
+ void PoisonHeapReference(Register reg) {
+ // reg = -reg.
+ PoisonHeapReference(reg, reg);
+ }
+ // Unpoison a heap reference contained in `reg`.
+ void UnpoisonHeapReference(Register reg) {
+ // reg = -reg.
+ Subu(reg, ZERO, reg);
+ }
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
+ // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybeUnpoisonHeapReference(Register reg) {
+ if (kPoisonHeapReferences) {
+ UnpoisonHeapReference(reg);
+ }
+ }
+
void Bind(Label* label) OVERRIDE {
Bind(down_cast<MipsLabel*>(label));
}
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 998f2c709b..39eb5893d8 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -488,6 +488,11 @@ void Mips64Assembler::Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
EmitI(0xf, rs, rt, imm16);
}
+void Mips64Assembler::Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+ CHECK_NE(rs, ZERO);
+ EmitI(0x1d, rs, rt, imm16);
+}
+
void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) {
EmitI(1, rs, static_cast<GpuRegister>(6), imm16);
}
@@ -2015,80 +2020,18 @@ void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
}
-void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
+void Mips64Assembler::LoadFromOffset(LoadOperandType type,
+ GpuRegister reg,
+ GpuRegister base,
int32_t offset) {
- if (!IsInt<16>(offset) ||
- (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
- !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
- LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
- Daddu(AT, AT, base);
- base = AT;
- offset &= (kMips64DoublewordSize - 1);
- }
-
- switch (type) {
- case kLoadSignedByte:
- Lb(reg, base, offset);
- break;
- case kLoadUnsignedByte:
- Lbu(reg, base, offset);
- break;
- case kLoadSignedHalfword:
- Lh(reg, base, offset);
- break;
- case kLoadUnsignedHalfword:
- Lhu(reg, base, offset);
- break;
- case kLoadWord:
- CHECK_ALIGNED(offset, kMips64WordSize);
- Lw(reg, base, offset);
- break;
- case kLoadUnsignedWord:
- CHECK_ALIGNED(offset, kMips64WordSize);
- Lwu(reg, base, offset);
- break;
- case kLoadDoubleword:
- if (!IsAligned<kMips64DoublewordSize>(offset)) {
- CHECK_ALIGNED(offset, kMips64WordSize);
- Lwu(reg, base, offset);
- Lwu(TMP2, base, offset + kMips64WordSize);
- Dinsu(reg, TMP2, 32, 32);
- } else {
- Ld(reg, base, offset);
- }
- break;
- }
+ LoadFromOffset<>(type, reg, base, offset);
}
-void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
+void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type,
+ FpuRegister reg,
+ GpuRegister base,
int32_t offset) {
- if (!IsInt<16>(offset) ||
- (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
- !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
- LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
- Daddu(AT, AT, base);
- base = AT;
- offset &= (kMips64DoublewordSize - 1);
- }
-
- switch (type) {
- case kLoadWord:
- CHECK_ALIGNED(offset, kMips64WordSize);
- Lwc1(reg, base, offset);
- break;
- case kLoadDoubleword:
- if (!IsAligned<kMips64DoublewordSize>(offset)) {
- CHECK_ALIGNED(offset, kMips64WordSize);
- Lwc1(reg, base, offset);
- Lw(TMP2, base, offset + kMips64WordSize);
- Mthc1(TMP2, reg);
- } else {
- Ldc1(reg, base, offset);
- }
- break;
- default:
- LOG(FATAL) << "UNREACHABLE";
- }
+ LoadFpuFromOffset<>(type, reg, base, offset);
}
void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset,
@@ -2118,72 +2061,18 @@ void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register,
}
}
-void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
+void Mips64Assembler::StoreToOffset(StoreOperandType type,
+ GpuRegister reg,
+ GpuRegister base,
int32_t offset) {
- if (!IsInt<16>(offset) ||
- (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
- !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
- LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
- Daddu(AT, AT, base);
- base = AT;
- offset &= (kMips64DoublewordSize - 1);
- }
-
- switch (type) {
- case kStoreByte:
- Sb(reg, base, offset);
- break;
- case kStoreHalfword:
- Sh(reg, base, offset);
- break;
- case kStoreWord:
- CHECK_ALIGNED(offset, kMips64WordSize);
- Sw(reg, base, offset);
- break;
- case kStoreDoubleword:
- if (!IsAligned<kMips64DoublewordSize>(offset)) {
- CHECK_ALIGNED(offset, kMips64WordSize);
- Sw(reg, base, offset);
- Dsrl32(TMP2, reg, 0);
- Sw(TMP2, base, offset + kMips64WordSize);
- } else {
- Sd(reg, base, offset);
- }
- break;
- default:
- LOG(FATAL) << "UNREACHABLE";
- }
+ StoreToOffset<>(type, reg, base, offset);
}
-void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
+void Mips64Assembler::StoreFpuToOffset(StoreOperandType type,
+ FpuRegister reg,
+ GpuRegister base,
int32_t offset) {
- if (!IsInt<16>(offset) ||
- (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
- !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
- LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
- Daddu(AT, AT, base);
- base = AT;
- offset &= (kMips64DoublewordSize - 1);
- }
-
- switch (type) {
- case kStoreWord:
- CHECK_ALIGNED(offset, kMips64WordSize);
- Swc1(reg, base, offset);
- break;
- case kStoreDoubleword:
- if (!IsAligned<kMips64DoublewordSize>(offset)) {
- CHECK_ALIGNED(offset, kMips64WordSize);
- Mfhc1(TMP2, reg);
- Swc1(reg, base, offset);
- Sw(TMP2, base, offset + kMips64WordSize);
- } else {
- Sdc1(reg, base, offset);
- }
- break;
- default:
- LOG(FATAL) << "UNREACHABLE";
- }
+ StoreFpuToOffset<>(type, reg, base, offset);
}
static dwarf::Reg DWARFReg(GpuRegister reg) {
@@ -2367,12 +2256,8 @@ void Mips64Assembler::LoadRef(ManagedRegister mdest, ManagedRegister base, Membe
CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
base.AsMips64().AsGpuRegister(), offs.Int32Value());
- if (kPoisonHeapReferences && unpoison_reference) {
- // TODO: review
- // Negate the 32-bit ref
- Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
- // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
- Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
+ if (unpoison_reference) {
+ MaybeUnpoisonHeapReference(dest.AsGpuRegister());
}
}
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a0a1db634d..8bbe862d19 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -512,6 +512,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void Ldpc(GpuRegister rs, uint32_t imm18); // MIPS64
void Lui(GpuRegister rt, uint16_t imm16);
void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+ void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16); // MIPS64
void Dahi(GpuRegister rs, uint16_t imm16); // MIPS64
void Dati(GpuRegister rs, uint16_t imm16); // MIPS64
void Sync(uint32_t stype);
@@ -654,6 +655,44 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value);
void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT); // MIPS64
+ //
+ // Heap poisoning.
+ //
+
+ // Poison a heap reference contained in `src` and store it in `dst`.
+ void PoisonHeapReference(GpuRegister dst, GpuRegister src) {
+ // dst = -src.
+ // Negate the 32-bit ref.
+ Dsubu(dst, ZERO, src);
+ // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+ Dext(dst, dst, 0, 32);
+ }
+ // Poison a heap reference contained in `reg`.
+ void PoisonHeapReference(GpuRegister reg) {
+ // reg = -reg.
+ PoisonHeapReference(reg, reg);
+ }
+ // Unpoison a heap reference contained in `reg`.
+ void UnpoisonHeapReference(GpuRegister reg) {
+ // reg = -reg.
+ // Negate the 32-bit ref.
+ Dsubu(reg, ZERO, reg);
+ // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+ Dext(reg, reg, 0, 32);
+ }
+ // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybePoisonHeapReference(GpuRegister reg) {
+ if (kPoisonHeapReferences) {
+ PoisonHeapReference(reg);
+ }
+ }
+ // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+ void MaybeUnpoisonHeapReference(GpuRegister reg) {
+ if (kPoisonHeapReferences) {
+ UnpoisonHeapReference(reg);
+ }
+ }
+
void Bind(Label* label) OVERRIDE {
Bind(down_cast<Mips64Label*>(label));
}
@@ -733,6 +772,191 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void Bc1nez(FpuRegister ft, Mips64Label* label);
void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
+
+ private:
+ // This will be used as an argument for loads/stores
+ // when there is no need for implicit null checks.
+ struct NoImplicitNullChecker {
+ void operator()() const {}
+ };
+
+ public:
+ template <typename ImplicitNullChecker = NoImplicitNullChecker>
+ void LoadFromOffset(LoadOperandType type,
+ GpuRegister reg,
+ GpuRegister base,
+ int32_t offset,
+ ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+ if (!IsInt<16>(offset) ||
+ (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+ !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+ LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+ Daddu(AT, AT, base);
+ base = AT;
+ offset &= (kMips64DoublewordSize - 1);
+ }
+
+ switch (type) {
+ case kLoadSignedByte:
+ Lb(reg, base, offset);
+ break;
+ case kLoadUnsignedByte:
+ Lbu(reg, base, offset);
+ break;
+ case kLoadSignedHalfword:
+ Lh(reg, base, offset);
+ break;
+ case kLoadUnsignedHalfword:
+ Lhu(reg, base, offset);
+ break;
+ case kLoadWord:
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Lw(reg, base, offset);
+ break;
+ case kLoadUnsignedWord:
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Lwu(reg, base, offset);
+ break;
+ case kLoadDoubleword:
+ if (!IsAligned<kMips64DoublewordSize>(offset)) {
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Lwu(reg, base, offset);
+ null_checker();
+ Lwu(TMP2, base, offset + kMips64WordSize);
+ Dinsu(reg, TMP2, 32, 32);
+ } else {
+ Ld(reg, base, offset);
+ null_checker();
+ }
+ break;
+ }
+ if (type != kLoadDoubleword) {
+ null_checker();
+ }
+ }
+
+ template <typename ImplicitNullChecker = NoImplicitNullChecker>
+ void LoadFpuFromOffset(LoadOperandType type,
+ FpuRegister reg,
+ GpuRegister base,
+ int32_t offset,
+ ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+ if (!IsInt<16>(offset) ||
+ (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+ !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+ LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+ Daddu(AT, AT, base);
+ base = AT;
+ offset &= (kMips64DoublewordSize - 1);
+ }
+
+ switch (type) {
+ case kLoadWord:
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Lwc1(reg, base, offset);
+ null_checker();
+ break;
+ case kLoadDoubleword:
+ if (!IsAligned<kMips64DoublewordSize>(offset)) {
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Lwc1(reg, base, offset);
+ null_checker();
+ Lw(TMP2, base, offset + kMips64WordSize);
+ Mthc1(TMP2, reg);
+ } else {
+ Ldc1(reg, base, offset);
+ null_checker();
+ }
+ break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ }
+ }
+
+ template <typename ImplicitNullChecker = NoImplicitNullChecker>
+ void StoreToOffset(StoreOperandType type,
+ GpuRegister reg,
+ GpuRegister base,
+ int32_t offset,
+ ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+ if (!IsInt<16>(offset) ||
+ (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+ !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+ LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+ Daddu(AT, AT, base);
+ base = AT;
+ offset &= (kMips64DoublewordSize - 1);
+ }
+
+ switch (type) {
+ case kStoreByte:
+ Sb(reg, base, offset);
+ break;
+ case kStoreHalfword:
+ Sh(reg, base, offset);
+ break;
+ case kStoreWord:
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Sw(reg, base, offset);
+ break;
+ case kStoreDoubleword:
+ if (!IsAligned<kMips64DoublewordSize>(offset)) {
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Sw(reg, base, offset);
+ null_checker();
+ Dsrl32(TMP2, reg, 0);
+ Sw(TMP2, base, offset + kMips64WordSize);
+ } else {
+ Sd(reg, base, offset);
+ null_checker();
+ }
+ break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ }
+ if (type != kStoreDoubleword) {
+ null_checker();
+ }
+ }
+
+ template <typename ImplicitNullChecker = NoImplicitNullChecker>
+ void StoreFpuToOffset(StoreOperandType type,
+ FpuRegister reg,
+ GpuRegister base,
+ int32_t offset,
+ ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+ if (!IsInt<16>(offset) ||
+ (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
+ !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+ LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
+ Daddu(AT, AT, base);
+ base = AT;
+ offset &= (kMips64DoublewordSize - 1);
+ }
+
+ switch (type) {
+ case kStoreWord:
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Swc1(reg, base, offset);
+ null_checker();
+ break;
+ case kStoreDoubleword:
+ if (!IsAligned<kMips64DoublewordSize>(offset)) {
+ CHECK_ALIGNED(offset, kMips64WordSize);
+ Mfhc1(TMP2, reg);
+ Swc1(reg, base, offset);
+ null_checker();
+ Sw(TMP2, base, offset + kMips64WordSize);
+ } else {
+ Sdc1(reg, base, offset);
+ null_checker();
+ }
+ break;
+ default:
+ LOG(FATAL) << "UNREACHABLE";
+ }
+ }
+
void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 74b8f068c1..96a02c46d7 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -1269,6 +1269,24 @@ TEST_F(AssemblerMIPS64Test, Lui) {
DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui");
}
+TEST_F(AssemblerMIPS64Test, Daui) {
+ std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+ std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+ reg2_registers.erase(reg2_registers.begin()); // reg2 can't be ZERO, remove it.
+ std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true);
+ WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+ std::ostringstream expected;
+ for (mips64::GpuRegister* reg1 : reg1_registers) {
+ for (mips64::GpuRegister* reg2 : reg2_registers) {
+ for (int64_t imm : imms) {
+ __ Daui(*reg1, *reg2, imm);
+ expected << "daui $" << *reg1 << ", $" << *reg2 << ", " << imm << "\n";
+ }
+ }
+ }
+ DriverStr(expected.str(), "daui");
+}
+
TEST_F(AssemblerMIPS64Test, Dahi) {
DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi");
}
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 6eab302dab..6a57f45e42 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -958,6 +958,14 @@ void X86Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0x5B);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2999599fc5..e3c123ccaf 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -457,6 +457,7 @@ class X86Assembler FINAL : public Assembler {
void cvttss2si(Register dst, XmmRegister src);
void cvttsd2si(Register dst, XmmRegister src);
+ void cvtdq2ps(XmmRegister dst, XmmRegister src);
void cvtdq2pd(XmmRegister dst, XmmRegister src);
void comiss(XmmRegister a, XmmRegister b);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a74bea207e..110d0dcd05 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -322,6 +322,14 @@ TEST_F(AssemblerX86Test, RollImm) {
DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
}
+TEST_F(AssemblerX86Test, Cvtdq2ps) {
+ DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
+TEST_F(AssemblerX86Test, Cvtdq2pd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
+}
+
TEST_F(AssemblerX86Test, ComissAddr) {
GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
const char* expected = "comiss 0(%EAX), %xmm0\n";
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 458204aca9..688fdcc37d 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1153,6 +1153,15 @@ void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
}
+void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x5B);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xF3);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0dc11d840b..480e7116eb 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -486,6 +486,7 @@ class X86_64Assembler FINAL : public Assembler {
void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
+ void cvtdq2ps(XmmRegister dst, XmmRegister src);
void cvtdq2pd(XmmRegister dst, XmmRegister src);
void comiss(XmmRegister a, XmmRegister b);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index fe9449720f..ba011c968e 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1205,6 +1205,10 @@ TEST_F(AssemblerX86_64Test, Cvtsd2ss) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss");
}
+TEST_F(AssemblerX86_64Test, Cvtdq2ps) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
TEST_F(AssemblerX86_64Test, Cvtdq2pd) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
}