summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp1
-rw-r--r--compiler/cfi_test.h2
-rw-r--r--compiler/common_compiler_test.cc2
-rw-r--r--compiler/compiled_method.h15
-rw-r--r--compiler/dex/dex_to_dex_decompiler_test.cc8
-rw-r--r--compiler/dex/quick_compiler_callbacks.cc18
-rw-r--r--compiler/dex/quick_compiler_callbacks.h12
-rw-r--r--compiler/driver/compiler_driver.cc35
-rw-r--r--compiler/driver/compiler_driver.h10
-rw-r--r--compiler/driver/compiler_driver_test.cc36
-rw-r--r--compiler/driver/compiler_options.cc16
-rw-r--r--compiler/driver/compiler_options.h17
-rw-r--r--compiler/image_test.cc2
-rw-r--r--compiler/image_test.h4
-rw-r--r--compiler/image_writer.cc36
-rw-r--r--compiler/image_writer.h2
-rw-r--r--compiler/jni/quick/jni_compiler.cc4
-rw-r--r--compiler/linker/arm/relative_patcher_arm_base.cc2
-rw-r--r--compiler/linker/arm64/relative_patcher_arm64.cc2
-rw-r--r--compiler/linker/mips/relative_patcher_mips.cc4
-rw-r--r--compiler/oat_writer.cc178
-rw-r--r--compiler/oat_writer.h13
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc60
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc148
-rw-r--r--compiler/optimizing/code_generator_arm64.cc170
-rw-r--r--compiler/optimizing/code_generator_arm64.h28
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc395
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h28
-rw-r--r--compiler/optimizing/code_generator_mips.cc141
-rw-r--r--compiler/optimizing/code_generator_mips.h10
-rw-r--r--compiler/optimizing/code_generator_mips64.cc592
-rw-r--r--compiler/optimizing/code_generator_mips64.h23
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc174
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc28
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc95
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc95
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc281
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc254
-rw-r--r--compiler/optimizing/code_generator_x86.cc27
-rw-r--r--compiler/optimizing/code_generator_x86.h4
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc26
-rw-r--r--compiler/optimizing/code_generator_x86_64.h4
-rw-r--r--compiler/optimizing/codegen_test_utils.h67
-rw-r--r--compiler/optimizing/emit_swap_mips_test.cc4
-rw-r--r--compiler/optimizing/graph_visualizer.cc14
-rw-r--r--compiler/optimizing/induction_var_range.cc6
-rw-r--r--compiler/optimizing/induction_var_range.h10
-rw-r--r--compiler/optimizing/instruction_simplifier.cc86
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc6
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h1
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc67
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.h1
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc86
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc341
-rw-r--r--compiler/optimizing/intrinsics_mips.cc1
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc1
-rw-r--r--compiler/optimizing/load_store_analysis.h31
-rw-r--r--compiler/optimizing/loop_optimization.cc443
-rw-r--r--compiler/optimizing/loop_optimization.h74
-rw-r--r--compiler/optimizing/loop_optimization_test.cc42
-rw-r--r--compiler/optimizing/nodes.cc49
-rw-r--r--compiler/optimizing/nodes.h26
-rw-r--r--compiler/optimizing/nodes_vector.h97
-rw-r--r--compiler/optimizing/nodes_vector_test.cc28
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc132
-rw-r--r--compiler/optimizing/optimizing_compiler.cc34
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h4
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.cc3
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc1
-rw-r--r--compiler/optimizing/reference_type_propagation.cc23
-rw-r--r--compiler/optimizing/reference_type_propagation.h10
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc3
-rw-r--r--compiler/optimizing/scheduler.cc30
-rw-r--r--compiler/optimizing/scheduler_arm.cc1
-rw-r--r--compiler/optimizing/scheduler_arm64.cc17
-rw-r--r--compiler/optimizing/scheduler_arm64.h5
-rw-r--r--compiler/optimizing/select_generator.cc68
-rw-r--r--compiler/optimizing/select_generator.h27
-rw-r--r--compiler/optimizing/sharpening.cc10
-rw-r--r--compiler/utils/arm/assembler_arm_vixl.cc16
-rw-r--r--compiler/utils/arm/assembler_arm_vixl.h10
-rw-r--r--compiler/utils/arm64/assembler_arm64.cc18
-rw-r--r--compiler/utils/arm64/assembler_arm64.h9
-rw-r--r--compiler/utils/arm64/jni_macro_assembler_arm64.cc2
-rw-r--r--compiler/utils/jni_macro_assembler.h9
-rw-r--r--compiler/utils/label.h4
-rw-r--r--compiler/utils/mips/assembler_mips.cc450
-rw-r--r--compiler/utils/mips/assembler_mips.h146
-rw-r--r--compiler/utils/mips/assembler_mips32r6_test.cc708
-rw-r--r--compiler/utils/mips/assembler_mips_test.cc797
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc386
-rw-r--r--compiler/utils/mips64/assembler_mips64.h114
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc934
-rw-r--r--compiler/utils/x86/assembler_x86.cc36
-rw-r--r--compiler/utils/x86/assembler_x86.h5
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc16
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc52
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h6
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc27
-rw-r--r--compiler/verifier_deps_test.cc10
100 files changed, 6399 insertions, 2207 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index d060dd49de..c798d9782a 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -315,6 +315,7 @@ art_cc_library {
srcs: ["common_compiler_test.cc"],
shared_libs: [
"libartd-compiler",
+ "libartd-disassembler",
"libart-runtime-gtest",
"libbase",
],
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index 5347e7fef3..866a4d57a7 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -26,7 +26,7 @@
#include "debug/dwarf/dwarf_constants.h"
#include "debug/dwarf/dwarf_test.h"
#include "debug/dwarf/headers.h"
-#include "disassembler/disassembler.h"
+#include "disassembler.h"
#include "gtest/gtest.h"
#include "thread.h"
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index a9a718f43c..0d38620b1a 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -95,7 +95,7 @@ void CommonCompilerTest::MakeExecutable(ArtMethod* method) {
const void* method_code = CompiledMethod::CodePointer(code_ptr,
compiled_method->GetInstructionSet());
LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code;
- class_linker_->SetEntryPointsToCompiledCode(method, method_code);
+ method->SetEntryPointFromQuickCompiledCode(method_code);
} else {
// No code? You must mean to go into the interpreter.
// Or the generic JNI...
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 97127f58ed..c67c523eb3 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -126,6 +126,7 @@ class LinkerPatch {
kTypeRelative, // NOTE: Actual patching is instruction_set-dependent.
kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent.
kStringRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kStringInternTable, // NOTE: Actual patching is instruction_set-dependent.
kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent.
kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent.
};
@@ -196,6 +197,16 @@ class LinkerPatch {
return patch;
}
+ static LinkerPatch StringInternTablePatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_string_idx) {
+ LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file);
+ patch.string_idx_ = target_string_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
static LinkerPatch StringBssEntryPatch(size_t literal_offset,
const DexFile* target_dex_file,
uint32_t pc_insn_offset,
@@ -234,6 +245,7 @@ class LinkerPatch {
case Type::kTypeRelative:
case Type::kTypeBssEntry:
case Type::kStringRelative:
+ case Type::kStringInternTable:
case Type::kStringBssEntry:
case Type::kBakerReadBarrierBranch:
return true;
@@ -264,12 +276,14 @@ class LinkerPatch {
const DexFile* TargetStringDexFile() const {
DCHECK(patch_type_ == Type::kStringRelative ||
+ patch_type_ == Type::kStringInternTable ||
patch_type_ == Type::kStringBssEntry);
return target_dex_file_;
}
dex::StringIndex TargetStringIndex() const {
DCHECK(patch_type_ == Type::kStringRelative ||
+ patch_type_ == Type::kStringInternTable ||
patch_type_ == Type::kStringBssEntry);
return dex::StringIndex(string_idx_);
}
@@ -280,6 +294,7 @@ class LinkerPatch {
patch_type_ == Type::kTypeRelative ||
patch_type_ == Type::kTypeBssEntry ||
patch_type_ == Type::kStringRelative ||
+ patch_type_ == Type::kStringInternTable ||
patch_type_ == Type::kStringBssEntry);
return pc_insn_offset_;
}
diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc
index 1ef3ba7c00..e36d416e9f 100644
--- a/compiler/dex/dex_to_dex_decompiler_test.cc
+++ b/compiler/dex/dex_to_dex_decompiler_test.cc
@@ -17,12 +17,12 @@
#include "dex_to_dex_decompiler.h"
#include "class_linker.h"
-#include "compiler/common_compiler_test.h"
-#include "compiler/compiled_method.h"
-#include "compiler/driver/compiler_driver.h"
-#include "compiler/driver/compiler_options.h"
+#include "common_compiler_test.h"
+#include "compiled_method.h"
#include "compiler_callbacks.h"
#include "dex_file.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "handle_scope-inl.h"
#include "mirror/class_loader.h"
#include "runtime.h"
diff --git a/compiler/dex/quick_compiler_callbacks.cc b/compiler/dex/quick_compiler_callbacks.cc
index 872f7ea15d..23511e55fc 100644
--- a/compiler/dex/quick_compiler_callbacks.cc
+++ b/compiler/dex/quick_compiler_callbacks.cc
@@ -16,6 +16,7 @@
#include "quick_compiler_callbacks.h"
+#include "driver/compiler_driver.h"
#include "verification_results.h"
#include "verifier/method_verifier-inl.h"
@@ -33,4 +34,21 @@ void QuickCompilerCallbacks::ClassRejected(ClassReference ref) {
}
}
+ClassStatus QuickCompilerCallbacks::GetPreviousClassState(ClassReference ref) {
+ // If we don't have class unloading enabled in the compiler, we will never see class that were
+ // previously verified. Return false to avoid overhead from the lookup in the compiler driver.
+ if (!does_class_unloading_) {
+ return ClassStatus::kStatusNotReady;
+ }
+ DCHECK(compiler_driver_ != nullptr);
+ // In the case of the quicken filter: avoiding verification of quickened instructions, which the
+ // verifier doesn't currently support.
+ // In the case of the verify filter, avoiding verifiying twice.
+ ClassStatus status;
+ if (!compiler_driver_->GetCompiledClass(ref, &status)) {
+ return ClassStatus::kStatusNotReady;
+ }
+ return status;
+}
+
} // namespace art
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index a3a6c0972c..45456f2a1c 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -22,6 +22,7 @@
namespace art {
+class CompilerDriver;
class VerificationResults;
class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
@@ -53,8 +54,19 @@ class QuickCompilerCallbacks FINAL : public CompilerCallbacks {
verification_results_ = verification_results;
}
+ ClassStatus GetPreviousClassState(ClassReference ref) OVERRIDE;
+
+ void SetDoesClassUnloading(bool does_class_unloading, CompilerDriver* compiler_driver)
+ OVERRIDE {
+ does_class_unloading_ = does_class_unloading;
+ compiler_driver_ = compiler_driver;
+ DCHECK(!does_class_unloading || compiler_driver_ != nullptr);
+ }
+
private:
VerificationResults* verification_results_ = nullptr;
+ bool does_class_unloading_ = false;
+ CompilerDriver* compiler_driver_ = nullptr;
std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
};
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index bd530ac6a6..18b54eefba 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -2102,16 +2102,29 @@ class VerifyClassVisitor : public CompilationVisitor {
ClassReference ref(manager_->GetDexFile(), class_def_index);
manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus());
- // It is *very* problematic if there are verification errors in the boot classpath. For example,
- // we rely on things working OK without verification when the decryption dialog is brought up.
- // So abort in a debug build if we find this violated.
+ // It is *very* problematic if there are verification errors in the boot classpath.
+ // For example, we rely on things working OK without verification when the decryption dialog
+ // is brought up. So abort in a debug build if we find this violated.
if (kIsDebugBuild) {
- // TODO(narayan): Remove this special case for signature polymorphic
- // invokes once verifier support is fully implemented.
- if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage() &&
- !android::base::StartsWith(descriptor, "Ljava/lang/invoke/")) {
- DCHECK(klass->IsVerified()) << "Boot classpath class " << klass->PrettyClass()
- << " failed to fully verify: state= " << klass->GetStatus();
+ if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) {
+ if (!klass->IsVerified()) {
+ // Re-run verification to get all failure messages if it soft-failed.
+ if (!klass->IsErroneous()) {
+ gLogVerbosity.verifier = true;
+ // Note: We can't call ClassLinker::VerifyClass, as it will elide the second
+ // verification.
+ Runtime* runtime = Runtime::Current();
+ std::string v_error;
+ verifier::MethodVerifier::VerifyClass(soa.Self(),
+ klass.Get(),
+ runtime->GetCompilerCallbacks(),
+ runtime->IsAotCompiler(),
+ verifier::HardFailLogMode::kLogInternalFatal,
+ &v_error);
+ }
+ LOG(FATAL) << "Boot classpath class " << klass->PrettyClass()
+ << " failed to fully verify: state= " << klass->GetStatus();
+ }
}
if (klass->IsVerified()) {
DCHECK_EQ(failure_kind, verifier::FailureKind::kNoFailure);
@@ -2879,9 +2892,9 @@ void CompilerDriver::AddCompiledMethod(const MethodReference& method_ref,
bool CompilerDriver::GetCompiledClass(ClassReference ref, mirror::Class::Status* status) const {
DCHECK(status != nullptr);
// The table doesn't know if something wasn't inserted. For this case it will return
- // kStatusNotReady. To handle this, just assume anything not verified is not compiled.
+ // kStatusNotReady. To handle this, just assume anything we didn't try to verify is not compiled.
if (!compiled_classes_.Get(DexFileReference(ref.first, ref.second), status) ||
- *status < mirror::Class::kStatusVerified) {
+ *status < mirror::Class::kStatusRetryVerificationAtRuntime) {
return false;
}
return true;
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index d9886a2fba..d08d9d7940 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -22,6 +22,8 @@
#include <unordered_set>
#include <vector>
+#include "android-base/strings.h"
+
#include "arch/instruction_set.h"
#include "base/array_ref.h"
#include "base/bit_utils.h"
@@ -377,6 +379,14 @@ class CompilerDriver {
return profile_compilation_info_;
}
+ // Is `boot_image_filename` the name of a core image (small boot
+ // image used for ART testing only)?
+ static bool IsCoreImageFilename(const std::string& boot_image_filename) {
+ // TODO: This is under-approximating...
+ return android::base::EndsWith(boot_image_filename, "core.art")
+ || android::base::EndsWith(boot_image_filename, "core-optimizing.art");
+ }
+
private:
void PreCompile(jobject class_loader,
const std::vector<const DexFile*>& dex_files,
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index fee6afb91f..278358b250 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -23,6 +23,7 @@
#include "art_method-inl.h"
#include "class_linker-inl.h"
#include "common_compiler_test.h"
+#include "compiler_callbacks.h"
#include "dex_file.h"
#include "dex_file_types.h"
#include "gc/heap.h"
@@ -366,6 +367,41 @@ TEST_F(CompilerDriverVerifyTest, VerifyCompilation) {
CheckVerifiedClass(class_loader, "LSecond;");
}
+// Test that a class of status kStatusRetryVerificationAtRuntime is indeed recorded that way in the
+// driver.
+TEST_F(CompilerDriverVerifyTest, RetryVerifcationStatusCheckVerified) {
+ Thread* const self = Thread::Current();
+ jobject class_loader;
+ std::vector<const DexFile*> dex_files;
+ const DexFile* dex_file = nullptr;
+ {
+ ScopedObjectAccess soa(self);
+ class_loader = LoadDex("ProfileTestMultiDex");
+ ASSERT_NE(class_loader, nullptr);
+ dex_files = GetDexFiles(class_loader);
+ ASSERT_GT(dex_files.size(), 0u);
+ dex_file = dex_files.front();
+ }
+ compiler_driver_->SetDexFilesForOatFile(dex_files);
+ callbacks_->SetDoesClassUnloading(true, compiler_driver_.get());
+ ClassReference ref(dex_file, 0u);
+ // Test that the status is read from the compiler driver as expected.
+ for (size_t i = mirror::Class::kStatusRetryVerificationAtRuntime;
+ i < mirror::Class::kStatusMax;
+ ++i) {
+ const mirror::Class::Status expected_status = static_cast<mirror::Class::Status>(i);
+ // Skip unsupported status that are not supposed to be ever recorded.
+ if (expected_status == mirror::Class::kStatusVerifyingAtRuntime ||
+ expected_status == mirror::Class::kStatusInitializing) {
+ continue;
+ }
+ compiler_driver_->RecordClassStatus(ref, expected_status);
+ mirror::Class::Status status = {};
+ ASSERT_TRUE(compiler_driver_->GetCompiledClass(ref, &status));
+ EXPECT_EQ(status, expected_status);
+ }
+}
+
// TODO: need check-cast test (when stub complete & we can throw/catch
} // namespace art
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 3cacc2cad7..538845de19 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -18,6 +18,8 @@
#include <fstream>
+#include "runtime.h"
+
namespace art {
CompilerOptions::CompilerOptions()
@@ -30,6 +32,7 @@ CompilerOptions::CompilerOptions()
inline_max_code_units_(kUnsetInlineMaxCodeUnits),
no_inline_from_(nullptr),
boot_image_(false),
+ core_image_(false),
app_image_(false),
top_k_profile_threshold_(kDefaultTopKProfileThreshold),
debuggable_(false),
@@ -55,6 +58,19 @@ CompilerOptions::~CompilerOptions() {
// because we don't want to include the PassManagerOptions definition from the header file.
}
+bool CompilerOptions::EmitRunTimeChecksInDebugMode() const {
+ // Run-time checks (e.g. Marking Register checks) are only emitted
+ // in debug mode, and
+ // - when running on device; or
+ // - when running on host, but only
+ // - when compiling the core image (which is used only for testing); or
+ // - when JIT compiling (only relevant for non-native methods).
+ // This is to prevent these checks from being emitted into pre-opted
+ // boot image or apps, as these are compiled with dex2oatd.
+ return kIsDebugBuild &&
+ (kIsTargetBuild || IsCoreImage() || Runtime::Current()->UseJitCompilation());
+}
+
void CompilerOptions::ParseHugeMethodMax(const StringPiece& option, UsageFn Usage) {
ParseUintOption(option, "--huge-method-max", &huge_method_threshold_, Usage);
}
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index b99263db0e..a9372c4844 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -161,6 +161,9 @@ class CompilerOptions FINAL {
return generate_mini_debug_info_;
}
+ // Should run-time checks be emitted in debug mode?
+ bool EmitRunTimeChecksInDebugMode() const;
+
bool GetGenerateBuildId() const {
return generate_build_id_;
}
@@ -177,14 +180,27 @@ class CompilerOptions FINAL {
return implicit_suspend_checks_;
}
+ // Are we compiling a boot image?
bool IsBootImage() const {
return boot_image_;
}
+ // Are we compiling a core image (small boot image only used for ART testing)?
+ bool IsCoreImage() const {
+ // Ensure that `core_image_` => `boot_image_`.
+ DCHECK(!core_image_ || boot_image_);
+ return core_image_;
+ }
+
+ // Are we compiling an app image?
bool IsAppImage() const {
return app_image_;
}
+ void DisableAppImage() {
+ app_image_ = false;
+ }
+
// Should the code be compiled as position independent?
bool GetCompilePic() const {
return compile_pic_;
@@ -266,6 +282,7 @@ class CompilerOptions FINAL {
const std::vector<const DexFile*>* no_inline_from_;
bool boot_image_;
+ bool core_image_;
bool app_image_;
// When using a profile file only the top K% of the profiled samples will be compiled.
double top_k_profile_threshold_;
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 252fdd67e1..7b623dd979 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -46,7 +46,7 @@ TEST_F(ImageTest, TestImageLayout) {
// Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the
// first two images.
ASSERT_EQ(image_sizes.size(), image_sizes.size());
- // Sizes of the images should be the same. These sizes are for the whole image unrounded.
+ // Sizes of the object sections should be the same for all but the last image.
for (size_t i = 0; i < image_sizes.size() - 1; ++i) {
EXPECT_EQ(image_sizes[i], image_sizes_extra[i]);
}
diff --git a/compiler/image_test.h b/compiler/image_test.h
index daa4b11967..f1adeddb69 100644
--- a/compiler/image_test.h
+++ b/compiler/image_test.h
@@ -133,7 +133,7 @@ inline std::vector<size_t> CompilationHelper::GetImageObjectSectionSizes() {
ImageHeader image_header;
CHECK_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
CHECK(image_header.IsValid());
- ret.push_back(image_header.GetImageSize());
+ ret.push_back(image_header.GetObjectsSection().Size());
}
return ret;
}
@@ -398,7 +398,7 @@ inline void ImageTest::TestWriteRead(ImageHeader::StorageMode storage_mode) {
ImageHeader image_header;
ASSERT_EQ(file->ReadFully(&image_header, sizeof(image_header)), true);
ASSERT_TRUE(image_header.IsValid());
- const auto& bitmap_section = image_header.GetImageSection(ImageHeader::kSectionImageBitmap);
+ const auto& bitmap_section = image_header.GetImageBitmapSection();
ASSERT_GE(bitmap_section.Offset(), sizeof(image_header));
ASSERT_NE(0U, bitmap_section.Size());
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 9e4971ce75..fa9f64c9a6 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -298,8 +298,7 @@ bool ImageWriter::Write(int image_fd,
// Write out the image bitmap at the page aligned start of the image end, also uncompressed for
// convenience.
- const ImageSection& bitmap_section = image_header->GetImageSection(
- ImageHeader::kSectionImageBitmap);
+ const ImageSection& bitmap_section = image_header->GetImageBitmapSection();
// Align up since data size may be unaligned if the image is compressed.
size_t bitmap_position_in_file = RoundUp(sizeof(ImageHeader) + data_size, kPageSize);
if (!is_compressed) {
@@ -690,7 +689,7 @@ bool ImageWriter::AllocMemory() {
for (ImageInfo& image_info : image_infos_) {
ImageSection unused_sections[ImageHeader::kSectionCount];
const size_t length = RoundUp(
- image_info.CreateImageSections(unused_sections), kPageSize);
+ image_info.CreateImageSections(unused_sections, compile_app_image_), kPageSize);
std::string error_msg;
image_info.image_.reset(MemMap::MapAnonymous("image writer image",
@@ -1686,6 +1685,10 @@ void ImageWriter::CalculateNewObjectOffsets() {
runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs);
image_methods_[ImageHeader::kSaveEverythingMethod] =
runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything);
+ image_methods_[ImageHeader::kSaveEverythingMethodForClinit] =
+ runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForClinit);
+ image_methods_[ImageHeader::kSaveEverythingMethodForSuspendCheck] =
+ runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverythingForSuspendCheck);
// Visit image methods first to have the main runtime methods in the first image.
for (auto* m : image_methods_) {
CHECK(m != nullptr);
@@ -1831,7 +1834,8 @@ void ImageWriter::CalculateNewObjectOffsets() {
image_info.image_begin_ = global_image_begin_ + image_offset;
image_info.image_offset_ = image_offset;
ImageSection unused_sections[ImageHeader::kSectionCount];
- image_info.image_size_ = RoundUp(image_info.CreateImageSections(unused_sections), kPageSize);
+ image_info.image_size_ =
+ RoundUp(image_info.CreateImageSections(unused_sections, compile_app_image_), kPageSize);
// There should be no gaps until the next image.
image_offset += image_info.image_size_;
}
@@ -1862,7 +1866,8 @@ void ImageWriter::CalculateNewObjectOffsets() {
}
}
-size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const {
+size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections,
+ bool app_image) const {
DCHECK(out_sections != nullptr);
// Do not round up any sections here that are represented by the bins since it will break
@@ -1901,8 +1906,13 @@ size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) c
ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
*dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray],
bin_slot_sizes_[kBinDexCacheArray]);
- // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
- size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t));
+ // For boot image, round up to the page boundary to separate the interned strings and
+ // class table from the modifiable data. We shall mprotect() these pages read-only when
+ // we load the boot image. This is more than sufficient for the string table alignment,
+ // namely sizeof(uint64_t). See HashSet::WriteToMemory.
+ static_assert(IsAligned<sizeof(uint64_t)>(kPageSize), "String table alignment check.");
+ size_t cur_pos =
+ RoundUp(dex_cache_arrays_section->End(), app_image ? sizeof(uint64_t) : kPageSize);
// Calculate the size of the interned strings.
ImageSection* interned_strings_section = &out_sections[ImageHeader::kSectionInternedStrings];
*interned_strings_section = ImageSection(cur_pos, intern_table_bytes_);
@@ -1925,7 +1935,7 @@ void ImageWriter::CreateHeader(size_t oat_index) {
// Create the image sections.
ImageSection sections[ImageHeader::kSectionCount];
- const size_t image_end = image_info.CreateImageSections(sections);
+ const size_t image_end = image_info.CreateImageSections(sections, compile_app_image_);
// Finally bitmap section.
const size_t bitmap_bytes = image_info.image_bitmap_->Size();
@@ -2110,8 +2120,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) {
// Write the intern table into the image.
if (image_info.intern_table_bytes_ > 0) {
- const ImageSection& intern_table_section = image_header->GetImageSection(
- ImageHeader::kSectionInternedStrings);
+ const ImageSection& intern_table_section = image_header->GetInternedStringsSection();
InternTable* const intern_table = image_info.intern_table_.get();
uint8_t* const intern_table_memory_ptr =
image_info.image_->Begin() + intern_table_section.Offset();
@@ -2130,8 +2139,7 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) {
// Write the class table(s) into the image. class_table_bytes_ may be 0 if there are multiple
// class loaders. Writing multiple class tables into the image is currently unsupported.
if (image_info.class_table_bytes_ > 0u) {
- const ImageSection& class_table_section = image_header->GetImageSection(
- ImageHeader::kSectionClassTable);
+ const ImageSection& class_table_section = image_header->GetClassTableSection();
uint8_t* const class_table_memory_ptr =
image_info.image_->Begin() + class_table_section.Offset();
ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
@@ -2589,10 +2597,6 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig,
CopyReference(copy->GetDeclaringClassAddressWithoutBarrier(), orig->GetDeclaringClassUnchecked());
- mirror::MethodDexCacheType* orig_resolved_methods =
- orig->GetDexCacheResolvedMethods(target_ptr_size_);
- copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
-
// OatWriter replaces the code_ with an offset value. Here we re-adjust to a pointer relative to
// oat_begin_
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 866e2042f7..2fc394e862 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -258,7 +258,7 @@ class ImageWriter FINAL {
// Create the image sections into the out sections variable, returns the size of the image
// excluding the bitmap.
- size_t CreateImageSections(ImageSection* out_sections) const;
+ size_t CreateImageSections(ImageSection* out_sections, bool app_image) const;
std::unique_ptr<MemMap> image_; // Memory mapped for generating the image.
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index b65b93f05f..e7e4647866 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -219,7 +219,9 @@ static CompiledMethod* ArtJniCompileMethodInternal(CompilerDriver* driver,
// Assembler that holds generated instructions
std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
GetMacroAssembler<kPointerSize>(&arena, instruction_set, instruction_set_features);
- jni_asm->cfi().SetEnabled(driver->GetCompilerOptions().GenerateAnyDebugInfo());
+ const CompilerOptions& compiler_options = driver->GetCompilerOptions();
+ jni_asm->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo());
+ jni_asm->SetEmitRunTimeChecksInDebugMode(compiler_options.EmitRunTimeChecksInDebugMode());
// Offsets into data structures
// TODO: if cross compiling these offsets are for the host not the target
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index 18ff1c9bb6..4ca5afe177 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -28,7 +28,7 @@ namespace linker {
class ArmBaseRelativePatcher::ThunkData {
public:
ThunkData(std::vector<uint8_t> code, uint32_t max_next_offset)
- : code_(code),
+ : code_(std::move(code)),
offsets_(),
max_next_offset_(max_next_offset),
pending_offset_(0u) {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index db829f3233..0ebabc15aa 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -63,6 +63,7 @@ inline bool IsAdrpPatch(const LinkerPatch& patch) {
case LinkerPatch::Type::kTypeRelative:
case LinkerPatch::Type::kTypeBssEntry:
case LinkerPatch::Type::kStringRelative:
+ case LinkerPatch::Type::kStringInternTable:
case LinkerPatch::Type::kStringBssEntry:
return patch.LiteralOffset() == patch.PcInsnOffset();
}
@@ -266,6 +267,7 @@ void Arm64RelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
// LDR/STR 32-bit or 64-bit with imm12 == 0 (unset).
DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry ||
patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
+ patch.GetType() == LinkerPatch::Type::kStringInternTable ||
patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn;
}
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index 3bec30f1e8..6c974c308f 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -61,10 +61,6 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code,
// lui reg, offset_high
DCHECK_EQ(((*code)[literal_offset + 2] & 0xE0), 0x00);
DCHECK_EQ((*code)[literal_offset + 3], 0x3C);
- // addu reg, reg, reg2
- DCHECK_EQ((*code)[literal_offset + 4], 0x21);
- DCHECK_EQ(((*code)[literal_offset + 5] & 0x07), 0x00);
- DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x00);
}
} else {
// instr reg(s), offset_low
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index d7e3a28777..a33081e033 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -62,6 +62,9 @@ namespace art {
namespace { // anonymous namespace
+// If we write dex layout info in the oat file.
+static constexpr bool kWriteDexLayoutInfo = true;
+
typedef DexFile::Header __attribute__((aligned(1))) UnalignedDexFileHeader;
const UnalignedDexFileHeader* AsUnalignedDexFileHeader(const uint8_t* raw_data) {
@@ -288,10 +291,14 @@ class OatWriter::OatDexFile {
uint32_t class_offsets_offset_;
uint32_t lookup_table_offset_;
uint32_t method_bss_mapping_offset_;
+ uint32_t dex_sections_layout_offset_;
// Data to write to a separate section.
dchecked_vector<uint32_t> class_offsets_;
+ // Dex section layout info to serialize.
+ DexLayoutSections dex_sections_layout_;
+
private:
DISALLOW_COPY_AND_ASSIGN(OatDexFile);
};
@@ -328,6 +335,7 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo
bss_method_entries_(),
bss_type_entries_(),
bss_string_entries_(),
+ map_boot_image_tables_to_bss_(false),
oat_data_offset_(0u),
oat_header_(nullptr),
size_vdex_header_(0),
@@ -362,6 +370,9 @@ OatWriter::OatWriter(bool compiling_boot_image, TimingLogger* timings, ProfileCo
size_oat_dex_file_offset_(0),
size_oat_dex_file_class_offsets_offset_(0),
size_oat_dex_file_lookup_table_offset_(0),
+ size_oat_dex_file_dex_layout_sections_offset_(0),
+ size_oat_dex_file_dex_layout_sections_(0),
+ size_oat_dex_file_dex_layout_sections_alignment_(0),
size_oat_dex_file_method_bss_mapping_offset_(0),
size_oat_lookup_table_alignment_(0),
size_oat_lookup_table_(0),
@@ -571,11 +582,16 @@ bool OatWriter::WriteAndOpenDexFiles(
}
}
- // Write TypeLookupTables into OAT.
+ // Write type lookup tables into the oat file.
if (!WriteTypeLookupTables(&checksum_updating_rodata, dex_files)) {
return false;
}
+ // Write dex layout sections into the oat file.
+ if (!WriteDexLayoutSections(&checksum_updating_rodata, dex_files)) {
+ return false;
+ }
+
*opened_dex_files_map = std::move(dex_files_map);
*opened_dex_files = std::move(dex_files);
write_state_ = WriteState::kPrepareLayout;
@@ -756,6 +772,8 @@ class OatWriter::InitBssLayoutMethodVisitor : public DexMethodVisitor {
} else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex());
writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u);
+ } else if (patch.GetType() == LinkerPatch::Type::kStringInternTable) {
+ writer_->map_boot_image_tables_to_bss_ = true;
}
}
} else {
@@ -1383,6 +1401,14 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
target_offset);
break;
}
+ case LinkerPatch::Type::kStringInternTable: {
+ uint32_t target_offset = GetInternTableEntryOffset(patch);
+ writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+ patch,
+ offset_ + literal_offset,
+ target_offset);
+ break;
+ }
case LinkerPatch::Type::kStringBssEntry: {
StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex());
uint32_t target_offset =
@@ -1520,7 +1546,6 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
}
mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
- ScopedObjectAccessUnchecked soa(Thread::Current());
ClassLinker* linker = Runtime::Current()->GetClassLinker();
mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(),
patch.TargetStringIndex(),
@@ -1588,6 +1613,28 @@ class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
data[2] = (address >> 16) & 0xffu;
data[3] = (address >> 24) & 0xffu;
}
+
+ // Calculate the offset of the InternTable slot (GcRoot<String>) when mmapped to the .bss.
+ uint32_t GetInternTableEntryOffset(const LinkerPatch& patch)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(!writer_->HasBootImage());
+ const uint8_t* string_root = writer_->LookupBootImageInternTableSlot(
+ *patch.TargetStringDexFile(), patch.TargetStringIndex());
+ DCHECK(string_root != nullptr);
+ uint32_t base_offset = writer_->bss_start_;
+ for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+ const uint8_t* const_tables_begin =
+ space->Begin() + space->GetImageHeader().GetBootImageConstantTablesOffset();
+ size_t offset = static_cast<size_t>(string_root - const_tables_begin);
+ if (offset < space->GetImageHeader().GetBootImageConstantTablesSize()) {
+ DCHECK_LE(base_offset + offset, writer_->bss_start_ + writer_->bss_methods_offset_);
+ return base_offset + offset;
+ }
+ base_offset += space->GetImageHeader().GetBootImageConstantTablesSize();
+ }
+ LOG(FATAL) << "Didn't find boot image string in boot image intern tables!";
+ UNREACHABLE();
+ }
};
class OatWriter::WriteMapMethodVisitor : public OatDexMethodVisitor {
@@ -1927,19 +1974,22 @@ void OatWriter::InitBssLayout(InstructionSet instruction_set) {
DCHECK_EQ(bss_size_, 0u);
if (HasBootImage()) {
+ DCHECK(!map_boot_image_tables_to_bss_);
DCHECK(bss_string_entries_.empty());
- if (bss_method_entries_.empty() && bss_type_entries_.empty()) {
- // Nothing to put to the .bss section.
- return;
- }
+ }
+ if (!map_boot_image_tables_to_bss_ &&
+ bss_method_entries_.empty() &&
+ bss_type_entries_.empty() &&
+ bss_string_entries_.empty()) {
+ // Nothing to put to the .bss section.
+ return;
}
- // Allocate space for app dex cache arrays in the .bss section.
+ // Allocate space for boot image tables in the .bss section.
PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set);
- if (!HasBootImage()) {
- for (const DexFile* dex_file : *dex_files_) {
- DexCacheArraysLayout layout(pointer_size, dex_file);
- bss_size_ += layout.Size();
+ if (map_boot_image_tables_to_bss_) {
+ for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+ bss_size_ += space->GetImageHeader().GetBootImageConstantTablesSize();
}
}
@@ -2320,6 +2370,9 @@ bool OatWriter::WriteCode(OutputStream* out) {
DO_STAT(size_oat_dex_file_offset_);
DO_STAT(size_oat_dex_file_class_offsets_offset_);
DO_STAT(size_oat_dex_file_lookup_table_offset_);
+ DO_STAT(size_oat_dex_file_dex_layout_sections_offset_);
+ DO_STAT(size_oat_dex_file_dex_layout_sections_);
+ DO_STAT(size_oat_dex_file_dex_layout_sections_alignment_);
DO_STAT(size_oat_dex_file_method_bss_mapping_offset_);
DO_STAT(size_oat_lookup_table_alignment_);
DO_STAT(size_oat_lookup_table_);
@@ -2775,7 +2828,12 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil
&error_msg);
} else if (oat_dex_file->source_.IsRawFile()) {
File* raw_file = oat_dex_file->source_.GetRawFile();
- dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg);
+ int dup_fd = dup(raw_file->Fd());
+ if (dup_fd < 0) {
+ PLOG(ERROR) << "Failed to dup dex file descriptor (" << raw_file->Fd() << ") at " << location;
+ return false;
+ }
+ dex_file = DexFile::OpenDex(dup_fd, location, /* verify_checksum */ true, &error_msg);
} else {
// The source data is a vdex file.
CHECK(oat_dex_file->source_.IsRawData())
@@ -2808,6 +2866,7 @@ bool OatWriter::LayoutAndWriteDexFile(OutputStream* out, OatDexFile* oat_dex_fil
if (!WriteDexFile(out, oat_dex_file, mem_map->Begin(), /* update_input_vdex */ false)) {
return false;
}
+ oat_dex_file->dex_sections_layout_ = dex_layout.GetSections();
// Set the checksum of the new oat dex file to be the original file's checksum.
oat_dex_file->dex_file_location_checksum_ = dex_file->GetLocationChecksum();
return true;
@@ -3153,6 +3212,70 @@ bool OatWriter::WriteTypeLookupTables(
return true;
}
+bool OatWriter::WriteDexLayoutSections(
+ OutputStream* oat_rodata,
+ const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files) {
+ TimingLogger::ScopedTiming split(__FUNCTION__, timings_);
+
+ if (!kWriteDexLayoutInfo) {
+ return true;;
+ }
+
+ uint32_t expected_offset = oat_data_offset_ + oat_size_;
+ off_t actual_offset = oat_rodata->Seek(expected_offset, kSeekSet);
+ if (static_cast<uint32_t>(actual_offset) != expected_offset) {
+ PLOG(ERROR) << "Failed to seek to dex layout section offset section. Actual: " << actual_offset
+ << " Expected: " << expected_offset << " File: " << oat_rodata->GetLocation();
+ return false;
+ }
+
+ DCHECK_EQ(opened_dex_files.size(), oat_dex_files_.size());
+ size_t rodata_offset = oat_size_;
+ for (size_t i = 0, size = opened_dex_files.size(); i != size; ++i) {
+ OatDexFile* oat_dex_file = &oat_dex_files_[i];
+ DCHECK_EQ(oat_dex_file->dex_sections_layout_offset_, 0u);
+
+ // Write dex layout section alignment bytes.
+ const size_t padding_size =
+ RoundUp(rodata_offset, alignof(DexLayoutSections)) - rodata_offset;
+ if (padding_size != 0u) {
+ std::vector<uint8_t> buffer(padding_size, 0u);
+ if (!oat_rodata->WriteFully(buffer.data(), padding_size)) {
+ PLOG(ERROR) << "Failed to write lookup table alignment padding."
+ << " File: " << oat_dex_file->GetLocation()
+ << " Output: " << oat_rodata->GetLocation();
+ return false;
+ }
+ size_oat_dex_file_dex_layout_sections_alignment_ += padding_size;
+ rodata_offset += padding_size;
+ }
+
+ DCHECK_ALIGNED(rodata_offset, alignof(DexLayoutSections));
+ DCHECK_EQ(oat_data_offset_ + rodata_offset,
+ static_cast<size_t>(oat_rodata->Seek(0u, kSeekCurrent)));
+ DCHECK(oat_dex_file != nullptr);
+ if (!oat_rodata->WriteFully(&oat_dex_file->dex_sections_layout_,
+ sizeof(oat_dex_file->dex_sections_layout_))) {
+ PLOG(ERROR) << "Failed to write dex layout sections."
+ << " File: " << oat_dex_file->GetLocation()
+ << " Output: " << oat_rodata->GetLocation();
+ return false;
+ }
+ oat_dex_file->dex_sections_layout_offset_ = rodata_offset;
+ size_oat_dex_file_dex_layout_sections_ += sizeof(oat_dex_file->dex_sections_layout_);
+ rodata_offset += sizeof(oat_dex_file->dex_sections_layout_);
+ }
+ oat_size_ = rodata_offset;
+
+ if (!oat_rodata->Flush()) {
+ PLOG(ERROR) << "Failed to flush stream after writing type dex layout sections."
+ << " File: " << oat_rodata->GetLocation();
+ return false;
+ }
+
+ return true;
+}
+
bool OatWriter::WriteChecksumsAndVdexHeader(OutputStream* vdex_out) {
if (!kIsVdexEnabled) {
return true;
@@ -3252,6 +3375,7 @@ OatWriter::OatDexFile::OatDexFile(const char* dex_file_location,
class_offsets_offset_(0u),
lookup_table_offset_(0u),
method_bss_mapping_offset_(0u),
+ dex_sections_layout_offset_(0u),
class_offsets_() {
}
@@ -3262,7 +3386,8 @@ size_t OatWriter::OatDexFile::SizeOf() const {
+ sizeof(dex_file_offset_)
+ sizeof(class_offsets_offset_)
+ sizeof(lookup_table_offset_)
- + sizeof(method_bss_mapping_offset_);
+ + sizeof(method_bss_mapping_offset_)
+ + sizeof(dex_sections_layout_offset_);
}
bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
@@ -3305,6 +3430,12 @@ bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) cons
}
oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
+ if (!out->WriteFully(&dex_sections_layout_offset_, sizeof(dex_sections_layout_offset_))) {
+ PLOG(ERROR) << "Failed to write dex section layout info to " << out->GetLocation();
+ return false;
+ }
+ oat_writer->size_oat_dex_file_dex_layout_sections_offset_ += sizeof(dex_sections_layout_offset_);
+
if (!out->WriteFully(&method_bss_mapping_offset_, sizeof(method_bss_mapping_offset_))) {
PLOG(ERROR) << "Failed to write method bss mapping offset to " << out->GetLocation();
return false;
@@ -3409,4 +3540,25 @@ bool OatWriter::OatClass::Write(OatWriter* oat_writer, OutputStream* out) const
return true;
}
+const uint8_t* OatWriter::LookupBootImageInternTableSlot(const DexFile& dex_file,
+ dex::StringIndex string_idx)
+ NO_THREAD_SAFETY_ANALYSIS {
+ // Single-threaded OatWriter can avoid locking.
+ uint32_t utf16_length;
+ const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
+ DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data));
+ InternTable::Utf8String string(utf16_length,
+ utf8_data,
+ ComputeUtf16HashFromModifiedUtf8(utf8_data, utf16_length));
+ const InternTable* intern_table = Runtime::Current()->GetClassLinker()->intern_table_;
+ for (const InternTable::Table::UnorderedSet& table : intern_table->strong_interns_.tables_) {
+ auto it = table.Find(string);
+ if (it != table.end()) {
+ return reinterpret_cast<const uint8_t*>(std::addressof(*it));
+ }
+ }
+ LOG(FATAL) << "Did not find boot image string " << utf8_data;
+ UNREACHABLE();
+}
+
} // namespace art
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 470d69edb3..780dee0bac 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -324,6 +324,8 @@ class OatWriter {
bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
bool WriteTypeLookupTables(OutputStream* oat_rodata,
const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
+ bool WriteDexLayoutSections(OutputStream* oat_rodata,
+ const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
bool WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat);
void SetMultiOatRelativePatcherAdjustment();
@@ -331,6 +333,10 @@ class OatWriter {
bool MayHaveCompiledMethods() const;
+ // Find the address of the GcRoot<String> in the InternTable for a boot image string.
+ const uint8_t* LookupBootImageInternTableSlot(const DexFile& dex_file,
+ dex::StringIndex string_idx);
+
enum class WriteState {
kAddingDexFileSources,
kPrepareLayout,
@@ -405,6 +411,10 @@ class OatWriter {
// is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`.
SafeMap<StringReference, size_t, StringReferenceValueComparator> bss_string_entries_;
+ // Whether boot image tables should be mapped to the .bss. This is needed for compiled
+ // code that reads from these tables with PC-relative instructions.
+ bool map_boot_image_tables_to_bss_;
+
// Offset of the oat data from the start of the mmapped region of the elf file.
size_t oat_data_offset_;
@@ -455,6 +465,9 @@ class OatWriter {
uint32_t size_oat_dex_file_offset_;
uint32_t size_oat_dex_file_class_offsets_offset_;
uint32_t size_oat_dex_file_lookup_table_offset_;
+ uint32_t size_oat_dex_file_dex_layout_sections_offset_;
+ uint32_t size_oat_dex_file_dex_layout_sections_;
+ uint32_t size_oat_dex_file_dex_layout_sections_alignment_;
uint32_t size_oat_dex_file_method_bss_mapping_offset_;
uint32_t size_oat_lookup_table_alignment_;
uint32_t size_oat_lookup_table_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index c166deb406..2f96cfa382 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1121,6 +1121,66 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ void VisitRem(HRem* instruction) OVERRIDE {
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+
+ // Handle 'i % CONST' format expression in array index, e.g:
+ // array[i % 20];
+ if (right->IsIntConstant()) {
+ int32_t right_const = std::abs(right->AsIntConstant()->GetValue());
+ if (right_const == 0) {
+ return;
+ }
+ // The sign of divisor CONST doesn't affect the sign final value range.
+ // For example:
+ // if (i > 0) {
+ // array[i % 10]; // index value range [0, 9]
+ // array[i % -10]; // index value range [0, 9]
+ // }
+ ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ ValueBound(nullptr, 1 - right_const),
+ ValueBound(nullptr, right_const - 1));
+
+ ValueRange* left_range = LookupValueRange(left, left->GetBlock());
+ if (left_range != nullptr) {
+ right_range = left_range->Narrow(right_range);
+ }
+ AssignRange(instruction->GetBlock(), instruction, right_range);
+ return;
+ }
+
+ // Handle following pattern:
+ // i0 NullCheck
+ // i1 ArrayLength[i0]
+ // i2 DivByZeroCheck [i1] <-- right
+ // i3 Rem [i5, i2] <-- we are here.
+ // i4 BoundsCheck [i3,i1]
+ if (right->IsDivZeroCheck()) {
+ // if array_length can pass div-by-zero check,
+ // array_length must be > 0.
+ right = right->AsDivZeroCheck()->InputAt(0);
+ }
+
+ // Handle 'i % array.length' format expression in array index, e.g:
+ // array[(i+7) % array.length];
+ if (right->IsArrayLength()) {
+ ValueBound lower = ValueBound::Min(); // ideally, lower should be '1-array_length'.
+ ValueBound upper = ValueBound(right, -1); // array_length - 1
+ ValueRange* right_range = new (GetGraph()->GetArena()) ValueRange(
+ GetGraph()->GetArena(),
+ lower,
+ upper);
+ ValueRange* left_range = LookupValueRange(left, left->GetBlock());
+ if (left_range != nullptr) {
+ right_range = left_range->Narrow(right_range);
+ }
+ AssignRange(instruction->GetBlock(), instruction, right_range);
+ return;
+ }
+ }
+
void VisitNewArray(HNewArray* new_array) OVERRIDE {
HInstruction* len = new_array->GetLength();
if (!len->IsIntConstant()) {
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 575e2fc24a..2aaf05833c 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -951,4 +951,152 @@ TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) {
ASSERT_TRUE(IsRemoved(bounds_check6));
}
+// int[] array = new int[10];
+// for (int i=0; i<200; i++) {
+// array[i%10] = 10; // Can eliminate
+// array[i%1] = 10; // Can eliminate
+// array[i%200] = 10; // Cannot eliminate
+// array[i%-10] = 10; // Can eliminate
+// array[i%array.length] = 10; // Can eliminate
+// array[param_i%10] = 10; // Can't eliminate, when param_i < 0
+// }
+TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) {
+ HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(entry);
+ graph_->SetEntryBlock(entry);
+ HInstruction* param_i = new (&allocator_)
+ HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
+ entry->AddInstruction(param_i);
+
+ HInstruction* constant_0 = graph_->GetIntConstant(0);
+ HInstruction* constant_1 = graph_->GetIntConstant(1);
+ HInstruction* constant_10 = graph_->GetIntConstant(10);
+ HInstruction* constant_200 = graph_->GetIntConstant(200);
+ HInstruction* constant_minus_10 = graph_->GetIntConstant(-10);
+
+ HBasicBlock* block = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(block);
+ entry->AddSuccessor(block);
+ // We pass a bogus constant for the class to avoid mocking one.
+ HInstruction* new_array = new (&allocator_) HNewArray(constant_10, constant_10, 0);
+ block->AddInstruction(new_array);
+ block->AddInstruction(new (&allocator_) HGoto());
+
+ HBasicBlock* loop_header = new (&allocator_) HBasicBlock(graph_);
+ HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
+ HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_);
+
+ graph_->AddBlock(loop_header);
+ graph_->AddBlock(loop_body);
+ graph_->AddBlock(exit);
+ block->AddSuccessor(loop_header);
+ loop_header->AddSuccessor(exit); // true successor
+ loop_header->AddSuccessor(loop_body); // false successor
+ loop_body->AddSuccessor(loop_header);
+
+ HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+ HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(phi, constant_200);
+ HInstruction* if_inst = new (&allocator_) HIf(cmp);
+ loop_header->AddPhi(phi);
+ loop_header->AddInstruction(cmp);
+ loop_header->AddInstruction(if_inst);
+ phi->AddInput(constant_0);
+
+ //////////////////////////////////////////////////////////////////////////////////
+ // LOOP BODY:
+ // array[i % 10] = 10;
+ HRem* i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_10, 0);
+ HBoundsCheck* bounds_check_i_mod_10 = new (&allocator_) HBoundsCheck(i_mod_10, constant_10, 0);
+ HInstruction* array_set = new (&allocator_) HArraySet(
+ new_array, bounds_check_i_mod_10, constant_10, Primitive::kPrimInt, 0);
+ loop_body->AddInstruction(i_mod_10);
+ loop_body->AddInstruction(bounds_check_i_mod_10);
+ loop_body->AddInstruction(array_set);
+
+ // array[i % 1] = 10;
+ HRem* i_mod_1 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0);
+ HBoundsCheck* bounds_check_i_mod_1 = new (&allocator_) HBoundsCheck(i_mod_1, constant_10, 0);
+ array_set = new (&allocator_) HArraySet(
+ new_array, bounds_check_i_mod_1, constant_10, Primitive::kPrimInt, 0);
+ loop_body->AddInstruction(i_mod_1);
+ loop_body->AddInstruction(bounds_check_i_mod_1);
+ loop_body->AddInstruction(array_set);
+
+ // array[i % 200] = 10;
+ HRem* i_mod_200 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_1, 0);
+ HBoundsCheck* bounds_check_i_mod_200 = new (&allocator_) HBoundsCheck(i_mod_200, constant_10, 0);
+ array_set = new (&allocator_) HArraySet(
+ new_array, bounds_check_i_mod_200, constant_10, Primitive::kPrimInt, 0);
+ loop_body->AddInstruction(i_mod_200);
+ loop_body->AddInstruction(bounds_check_i_mod_200);
+ loop_body->AddInstruction(array_set);
+
+ // array[i % -10] = 10;
+ HRem* i_mod_minus_10 = new (&allocator_) HRem(Primitive::kPrimInt, phi, constant_minus_10, 0);
+ HBoundsCheck* bounds_check_i_mod_minus_10 = new (&allocator_) HBoundsCheck(
+ i_mod_minus_10, constant_10, 0);
+ array_set = new (&allocator_) HArraySet(
+ new_array, bounds_check_i_mod_minus_10, constant_10, Primitive::kPrimInt, 0);
+ loop_body->AddInstruction(i_mod_minus_10);
+ loop_body->AddInstruction(bounds_check_i_mod_minus_10);
+ loop_body->AddInstruction(array_set);
+
+ // array[i%array.length] = 10;
+ HNullCheck* null_check = new (&allocator_) HNullCheck(new_array, 0);
+ HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0);
+ HRem* i_mod_array_length = new (&allocator_) HRem(Primitive::kPrimInt, phi, array_length, 0);
+ HBoundsCheck* bounds_check_i_mod_array_len = new (&allocator_) HBoundsCheck(
+ i_mod_array_length, array_length, 0);
+ array_set = new (&allocator_) HArraySet(
+ null_check, bounds_check_i_mod_array_len, constant_10, Primitive::kPrimInt, 0);
+ loop_body->AddInstruction(null_check);
+ loop_body->AddInstruction(array_length);
+ loop_body->AddInstruction(i_mod_array_length);
+ loop_body->AddInstruction(bounds_check_i_mod_array_len);
+ loop_body->AddInstruction(array_set);
+
+ // array[param_i % 10] = 10;
+ HRem* param_i_mod_10 = new (&allocator_) HRem(Primitive::kPrimInt, param_i, constant_10, 0);
+ HBoundsCheck* bounds_check_param_i_mod_10 = new (&allocator_) HBoundsCheck(
+ param_i_mod_10, constant_10, 0);
+ array_set = new (&allocator_) HArraySet(
+ new_array, bounds_check_param_i_mod_10, constant_10, Primitive::kPrimInt, 0);
+ loop_body->AddInstruction(param_i_mod_10);
+ loop_body->AddInstruction(bounds_check_param_i_mod_10);
+ loop_body->AddInstruction(array_set);
+
+ // array[param_i%array.length] = 10;
+ null_check = new (&allocator_) HNullCheck(new_array, 0);
+ array_length = new (&allocator_) HArrayLength(null_check, 0);
+ HRem* param_i_mod_array_length = new (&allocator_) HRem(
+ Primitive::kPrimInt, param_i, array_length, 0);
+ HBoundsCheck* bounds_check_param_i_mod_array_len = new (&allocator_) HBoundsCheck(
+ param_i_mod_array_length, array_length, 0);
+ array_set = new (&allocator_) HArraySet(
+ null_check, bounds_check_param_i_mod_array_len, constant_10, Primitive::kPrimInt, 0);
+ loop_body->AddInstruction(null_check);
+ loop_body->AddInstruction(array_length);
+ loop_body->AddInstruction(param_i_mod_array_length);
+ loop_body->AddInstruction(bounds_check_param_i_mod_array_len);
+ loop_body->AddInstruction(array_set);
+
+ // i++;
+ HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, phi, constant_1);
+ loop_body->AddInstruction(add);
+ loop_body->AddInstruction(new (&allocator_) HGoto());
+ phi->AddInput(add);
+ //////////////////////////////////////////////////////////////////////////////////
+
+ exit->AddInstruction(new (&allocator_) HExit());
+
+ RunBCE();
+
+ ASSERT_TRUE(IsRemoved(bounds_check_i_mod_10));
+ ASSERT_TRUE(IsRemoved(bounds_check_i_mod_1));
+ ASSERT_TRUE(IsRemoved(bounds_check_i_mod_200));
+ ASSERT_TRUE(IsRemoved(bounds_check_i_mod_minus_10));
+ ASSERT_TRUE(IsRemoved(bounds_check_i_mod_array_len));
+ ASSERT_FALSE(IsRemoved(bounds_check_param_i_mod_10));
+}
+
} // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 4999950600..1b628688ec 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -435,11 +435,11 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
// The string entry page address was preserved in temp_ thanks to kSaveEverything.
} else {
// For non-Baker read barrier, we need to re-calculate the address of the string entry page.
- adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
+ adrp_label_ = arm64_codegen->NewStringBssEntryPatch(dex_file, string_index);
arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
}
vixl::aarch64::Label* strp_label =
- arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
+ arm64_codegen->NewStringBssEntryPatch(dex_file, string_index, adrp_label_);
{
SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
__ Bind(strp_label);
@@ -1463,6 +1463,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -1595,6 +1596,8 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
__ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
}
}
+
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void CodeGeneratorARM64::GenerateFrameExit() {
@@ -3587,6 +3590,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s
}
if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
if (!codegen_->GoesToNextBlock(block, successor)) {
__ B(codegen_->GetLabelOf(successor));
@@ -4391,6 +4395,7 @@ void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
@@ -4459,6 +4464,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
+
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4626,6 +4633,7 @@ void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
codegen_->GenerateInvokePolymorphicCall(invoke);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch(
@@ -4668,6 +4676,13 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
}
+vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ vixl::aarch64::Label* adrp_label) {
+ return NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
+}
+
vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
baker_read_barrier_patches_.emplace_back(custom_data);
return &baker_read_barrier_patches_.back().label;
@@ -4757,6 +4772,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
pc_relative_string_patches_.size() +
+ string_bss_entry_patches_.size() +
baker_read_barrier_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
@@ -4769,13 +4785,15 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
} else {
DCHECK(pc_relative_method_patches_.empty());
DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
+ linker_patches);
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
info.custom_data));
@@ -4801,27 +4819,37 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
return;
}
- // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- LocationSummary* locations = invoke->GetLocations();
- codegen_->GenerateStaticOrDirectCall(
- invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+ {
+ // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
+ // are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
+ LocationSummary* locations = invoke->GetLocations();
+ codegen_->GenerateStaticOrDirectCall(
+ invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+ }
+
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
return;
}
- // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
- // are no pools emitted.
- EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
- codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
- DCHECK(!codegen_->IsLeafMethod());
+ {
+ // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
+ // are no pools emitted.
+ EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
+ codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+ DCHECK(!codegen_->IsLeafMethod());
+ }
+
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
@@ -4895,6 +4923,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -4995,6 +5024,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
} else {
__ Bind(slow_path->GetExitLabel());
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
}
@@ -5024,6 +5054,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5071,24 +5102,37 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+ DCHECK(codegen_->GetCompilerOptions().IsBootImage());
// Add ADRP with its PC-relative String patch.
const DexFile& dex_file = load->GetDexFile();
const dex::StringIndex string_index = load->GetStringIndex();
- DCHECK(codegen_->GetCompilerOptions().IsBootImage());
vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
// Add ADD with its PC-relative String patch.
vixl::aarch64::Label* add_label =
codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
- return; // No dex cache slow path.
+ return;
}
case HLoadString::LoadKind::kBootImageAddress: {
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(load->GetString().Get()));
DCHECK_NE(address, 0u);
__ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
- return; // No dex cache slow path.
+ return;
+ }
+ case HLoadString::LoadKind::kBootImageInternTable: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ // Add ADRP with its PC-relative String patch.
+ const DexFile& dex_file = load->GetDexFile();
+ const dex::StringIndex string_index = load->GetStringIndex();
+ vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+ codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
+ // Add LDR with its PC-relative String patch.
+ vixl::aarch64::Label* ldr_label =
+ codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+ codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
+ return;
}
case HLoadString::LoadKind::kBssEntry: {
// Add ADRP with its PC-relative String .bss entry patch.
@@ -5096,11 +5140,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
const dex::StringIndex string_index = load->GetStringIndex();
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0));
- vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
+ vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
- // Add LDR with its PC-relative String patch.
+ // Add LDR with its .bss entry String patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+ codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
// /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
GenerateGcRootFieldLoad(load,
out_loc,
@@ -5113,6 +5157,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
codegen_->AddSlowPath(slow_path);
__ Cbz(out.X(), slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
return;
}
case HLoadString::LoadKind::kJitTableAddress: {
@@ -5137,6 +5182,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
__ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -5164,6 +5210,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins
} else {
CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -5260,6 +5307,7 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
@@ -5296,6 +5344,7 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction)
codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -5644,6 +5693,7 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction
return;
}
GenerateSuspendCheck(instruction, nullptr);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
@@ -6021,6 +6071,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -6074,22 +6125,25 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins
obj.GetCode());
vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
- EmissionCheckScope guard(GetVIXLAssembler(),
- (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- __ Bind(cbnz_label);
- __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
- __ ldr(ref_reg, MemOperand(base.X(), offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ {
+ EmissionCheckScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ __ Bind(cbnz_label);
+ __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot);
+ __ ldr(ref_reg, MemOperand(base.X(), offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
}
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
- __ Bind(&return_address);
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
return;
}
@@ -6158,19 +6212,22 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
__ Add(temp.X(), obj.X(), Operand(data_offset));
- EmissionCheckScope guard(GetVIXLAssembler(),
- (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- __ Bind(cbnz_label);
- __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
- DCHECK(!needs_null_check); // The thunk cannot handle the null check.
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
- __ Bind(&return_address);
+ {
+ EmissionCheckScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ __ Bind(cbnz_label);
+ __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ __ Bind(&return_address);
+ }
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
return;
}
@@ -6247,6 +6304,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
GenerateRawReferenceLoad(
instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
__ Bind(slow_path->GetExitLabel());
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
@@ -6303,6 +6361,7 @@ void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction*
// Fast path: the GC is not marking: nothing to do (the field is
// up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
+ MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
}
void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
@@ -6381,6 +6440,19 @@ void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
}
+void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
+ // The following condition is a compile-time one, so it does not have a run-time cost.
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+ // The following condition is a run-time one; it is executed after the
+ // previous compile-time test, to avoid penalizing non-debug builds.
+ if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
+ GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
+ }
+ }
+}
+
void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
Location out,
Location ref,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 584eead81b..69c511907e 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -599,6 +599,14 @@ class CodeGeneratorARM64 : public CodeGenerator {
dex::StringIndex string_index,
vixl::aarch64::Label* adrp_label = nullptr);
+ // Add a new .bss entry string patch for an instruction and return the label
+ // to be bound before the instruction. The instruction will be either the
+ // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+ // to the associated ADRP patch label).
+ vixl::aarch64::Label* NewStringBssEntryPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ vixl::aarch64::Label* adrp_label = nullptr);
+
// Add a new baker read barrier patch and return the label to be bound
// before the CBNZ instruction.
vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
@@ -687,6 +695,22 @@ class CodeGeneratorARM64 : public CodeGenerator {
bool needs_null_check,
bool use_load_acquire);
+ // Emit code checking the status of the Marking Register, and
+ // aborting the program if MR does not match the value stored in the
+ // art::Thread object. Code is only emitted in debug mode and if
+ // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
+ //
+ // Argument `code` is used to identify the different occurrences of
+ // MaybeGenerateMarkingRegisterCheck in the code generator, and is
+ // passed to the BRK instruction.
+ //
+ // If `temp_loc` is a valid location, it is expected to be a
+ // register and will be used as a temporary to generate code;
+ // otherwise, a temporary will be fetched from the core register
+ // scratch pool.
+ virtual void MaybeGenerateMarkingRegisterCheck(int code,
+ Location temp_loc = Location::NoLocation());
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
@@ -809,8 +833,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+ // PC-relative String patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
// Baker read barrier patch info.
ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 430cdde1f7..8288141954 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -94,6 +94,9 @@ constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
// The reserved entrypoint register for link-time generated thunks.
const vixl32::Register kBakerCcEntrypointRegister = r4;
+// Using a base helps identify when we hit Marking Register check breakpoints.
+constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
+
#ifdef __
#error "ARM Codegen VIXL macro-assembler macro already defined."
#endif
@@ -595,7 +598,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ arm_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index);
arm_codegen->EmitMovwMovtPlaceholder(labels, temp);
__ Str(r0, MemOperand(temp));
}
@@ -1872,15 +1875,26 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
case kCondBE:
case kCondA:
case kCondAE: {
+ const uint32_t value_low = Low32Bits(value);
+ Operand operand_low(value_low);
+
__ Cmp(left_high, High32Bits(value));
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we must ensure that the operands corresponding to the least significant
+ // halves of the inputs fit into a 16-bit CMP encoding.
+ if (!left_low.IsLow() || !IsUint<8>(value_low)) {
+ operand_low = Operand(temps.Acquire());
+ __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low);
+ }
+
// We use the scope because of the IT block that follows.
ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
2 * vixl32::k16BitT32InstructionSizeInBytes,
CodeBufferCheckScope::kExactSize);
__ it(eq);
- __ cmp(eq, left_low, Low32Bits(value));
+ __ cmp(eq, left_low, operand_low);
ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
break;
}
@@ -2022,46 +2036,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition*
return ret;
}
-static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
- if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
- const LocationSummary* const locations = condition->GetLocations();
-
- if (locations->InAt(1).IsConstant()) {
- IfCondition c = condition->GetCondition();
- IfCondition opposite = condition->GetOppositeCondition();
- const int64_t value =
- AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite);
-
- if (c < kCondLT || c > kCondGE) {
- // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
- // we check that the least significant half of the first input to be compared
- // is in a low register (the other half is read outside an IT block), and
- // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
- // encoding can be used; 0 is always handled, no matter what registers are
- // used by the first input.
- if (value != 0 &&
- (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) {
- return false;
- }
- // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
- // the previous one, but are not strictly necessary.
- } else if (c == kCondLE || c == kCondGT) {
- if (value < std::numeric_limits<int64_t>::max() &&
- !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
- return false;
- }
- } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
- return false;
- }
- }
- }
-
- return true;
-}
-
static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
- DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
-
const vixl32::Register out = OutputRegister(cond);
const auto condition = GenerateTest(cond, false, codegen);
@@ -2144,91 +2119,6 @@ static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
}
}
-static void GenerateLongComparesAndJumps(HCondition* cond,
- vixl32::Label* true_label,
- vixl32::Label* false_label,
- CodeGeneratorARMVIXL* codegen,
- bool is_far_target = true) {
- LocationSummary* locations = cond->GetLocations();
- Location left = locations->InAt(0);
- Location right = locations->InAt(1);
- IfCondition if_cond = cond->GetCondition();
-
- vixl32::Register left_high = HighRegisterFrom(left);
- vixl32::Register left_low = LowRegisterFrom(left);
- IfCondition true_high_cond = if_cond;
- IfCondition false_high_cond = cond->GetOppositeCondition();
- vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part
-
- // Set the conditions for the test, remembering that == needs to be
- // decided using the low words.
- switch (if_cond) {
- case kCondEQ:
- case kCondNE:
- // Nothing to do.
- break;
- case kCondLT:
- false_high_cond = kCondGT;
- break;
- case kCondLE:
- true_high_cond = kCondLT;
- break;
- case kCondGT:
- false_high_cond = kCondLT;
- break;
- case kCondGE:
- true_high_cond = kCondGT;
- break;
- case kCondB:
- false_high_cond = kCondA;
- break;
- case kCondBE:
- true_high_cond = kCondB;
- break;
- case kCondA:
- false_high_cond = kCondB;
- break;
- case kCondAE:
- true_high_cond = kCondA;
- break;
- }
- if (right.IsConstant()) {
- int64_t value = Int64ConstantFrom(right);
- int32_t val_low = Low32Bits(value);
- int32_t val_high = High32Bits(value);
-
- __ Cmp(left_high, val_high);
- if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label, is_far_target);
- } else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label, is_far_target);
- } else {
- __ B(ARMCondition(true_high_cond), true_label, is_far_target);
- __ B(ARMCondition(false_high_cond), false_label, is_far_target);
- }
- // Must be equal high, so compare the lows.
- __ Cmp(left_low, val_low);
- } else {
- vixl32::Register right_high = HighRegisterFrom(right);
- vixl32::Register right_low = LowRegisterFrom(right);
-
- __ Cmp(left_high, right_high);
- if (if_cond == kCondNE) {
- __ B(ARMCondition(true_high_cond), true_label, is_far_target);
- } else if (if_cond == kCondEQ) {
- __ B(ARMCondition(false_high_cond), false_label, is_far_target);
- } else {
- __ B(ARMCondition(true_high_cond), true_label, is_far_target);
- __ B(ARMCondition(false_high_cond), false_label, is_far_target);
- }
- // Must be equal high, so compare the lows.
- __ Cmp(left_low, right_low);
- }
- // The last comparison might be unsigned.
- // TODO: optimize cases where this is always true/false
- __ B(final_condition, true_label, is_far_target);
-}
-
static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
@@ -2283,38 +2173,14 @@ static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codege
}
}
- if ((condition == kCondEQ || condition == kCondNE) &&
- // If `out` is a low register, then the GenerateConditionGeneric()
- // function generates a shorter code sequence that is still branchless.
- (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) {
+ // If `out` is a low register, then the GenerateConditionGeneric()
+ // function generates a shorter code sequence that is still branchless.
+ if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) {
GenerateEqualLong(cond, codegen);
return;
}
- if (CanGenerateTest(cond, codegen->GetAssembler())) {
- GenerateConditionGeneric(cond, codegen);
- return;
- }
-
- // Convert the jumps into the result.
- vixl32::Label done_label;
- vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
- vixl32::Label true_label, false_label;
-
- GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false);
-
- // False case: result = 0.
- __ Bind(&false_label);
- __ Mov(out, 0);
- __ B(final_label);
-
- // True case: result = 1.
- __ Bind(&true_label);
- __ Mov(out, 1);
-
- if (done_label.IsReferenced()) {
- __ Bind(&done_label);
- }
+ GenerateConditionGeneric(cond, codegen);
}
static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond,
@@ -2514,6 +2380,7 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -2690,6 +2557,8 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
__ Mov(temp, 0);
GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
}
+
+ MaybeGenerateMarkingRegisterCheck(/* code */ 1);
}
void CodeGeneratorARMVIXL::GenerateFrameExit() {
@@ -2938,6 +2807,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock*
}
if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2);
}
if (!codegen_->GoesToNextBlock(block, successor)) {
__ B(codegen_->GetLabelOf(successor));
@@ -2971,56 +2841,41 @@ void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
}
void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
- vixl32::Label* true_target_in,
- vixl32::Label* false_target_in,
+ vixl32::Label* true_target,
+ vixl32::Label* false_target,
bool is_far_target) {
- if (CanGenerateTest(condition, codegen_->GetAssembler())) {
- vixl32::Label* non_fallthrough_target;
- bool invert;
- bool emit_both_branches;
-
- if (true_target_in == nullptr) {
- // The true target is fallthrough.
- DCHECK(false_target_in != nullptr);
- non_fallthrough_target = false_target_in;
- invert = true;
- emit_both_branches = false;
- } else {
- non_fallthrough_target = true_target_in;
- invert = false;
- // Either the false target is fallthrough, or there is no fallthrough
- // and both branches must be emitted.
- emit_both_branches = (false_target_in != nullptr);
- }
-
- const auto cond = GenerateTest(condition, invert, codegen_);
-
- __ B(cond.first, non_fallthrough_target, is_far_target);
+ if (true_target == false_target) {
+ DCHECK(true_target != nullptr);
+ __ B(true_target);
+ return;
+ }
- if (emit_both_branches) {
- // No target falls through, we need to branch.
- __ B(false_target_in);
- }
+ vixl32::Label* non_fallthrough_target;
+ bool invert;
+ bool emit_both_branches;
- return;
+ if (true_target == nullptr) {
+ // The true target is fallthrough.
+ DCHECK(false_target != nullptr);
+ non_fallthrough_target = false_target;
+ invert = true;
+ emit_both_branches = false;
+ } else {
+ non_fallthrough_target = true_target;
+ invert = false;
+ // Either the false target is fallthrough, or there is no fallthrough
+ // and both branches must be emitted.
+ emit_both_branches = (false_target != nullptr);
}
- // Generated branching requires both targets to be explicit. If either of the
- // targets is nullptr (fallthrough) use and bind `fallthrough` instead.
- vixl32::Label fallthrough;
- vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in;
- vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
+ const auto cond = GenerateTest(condition, invert, codegen_);
- DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
- GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target);
+ __ B(cond.first, non_fallthrough_target, is_far_target);
- if (false_target != &fallthrough) {
+ if (emit_both_branches) {
+ // No target falls through, we need to branch.
__ B(false_target);
}
-
- if (fallthrough.IsReferenced()) {
- __ Bind(&fallthrough);
- }
}
void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction,
@@ -3215,9 +3070,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
return;
}
- if (!Primitive::IsFloatingPointType(type) &&
- (IsBooleanValueOrMaterializedCondition(condition) ||
- CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
+ if (!Primitive::IsFloatingPointType(type)) {
bool invert = false;
if (out.Equals(second)) {
@@ -3655,6 +3508,7 @@ void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3);
}
void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -3685,12 +3539,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4);
return;
}
LocationSummary* locations = invoke->GetLocations();
codegen_->GenerateStaticOrDirectCall(
invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5);
}
void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
@@ -3709,11 +3566,14 @@ void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6);
return;
}
codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
DCHECK(!codegen_->IsLeafMethod());
+
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7);
}
void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -3790,6 +3650,8 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
DCHECK(!codegen_->IsLeafMethod());
}
+
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8);
}
void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
@@ -3798,6 +3660,7 @@ void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke)
void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
codegen_->GenerateInvokePolymorphicCall(invoke);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9);
}
void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
@@ -5329,6 +5192,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction
codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10);
}
void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
@@ -5348,6 +5212,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11);
}
void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
@@ -6965,6 +6830,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instructi
return;
}
GenerateSuspendCheck(instruction, nullptr);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12);
}
void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
@@ -7326,6 +7192,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -7405,6 +7272,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
} else {
__ Bind(slow_path->GetExitLabel());
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14);
}
}
@@ -7448,6 +7316,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7505,14 +7374,22 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
codegen_->EmitMovwMovtPlaceholder(labels, out);
- return; // No dex cache slow path.
+ return;
}
case HLoadString::LoadKind::kBootImageAddress: {
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(load->GetString().Get()));
DCHECK_NE(address, 0u);
__ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
- return; // No dex cache slow path.
+ return;
+ }
+ case HLoadString::LoadKind::kBootImageInternTable: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->EmitMovwMovtPlaceholder(labels, out);
+ __ Ldr(out, MemOperand(out, /* offset */ 0));
+ return;
}
case HLoadString::LoadKind::kBssEntry: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
@@ -7520,7 +7397,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
? RegisterFrom(locations->GetTemp(0))
: out;
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
codegen_->EmitMovwMovtPlaceholder(labels, temp);
GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
LoadStringSlowPathARMVIXL* slow_path =
@@ -7528,6 +7405,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
codegen_->AddSlowPath(slow_path);
__ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15);
return;
}
case HLoadString::LoadKind::kJitTableAddress: {
@@ -7548,6 +7426,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
__ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16);
}
static int32_t GetExceptionTlsOffset() {
@@ -8146,6 +8025,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* i
} else {
CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17);
}
void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
@@ -8647,6 +8527,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
}
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
}
void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
@@ -8711,31 +8592,34 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i
base.GetCode(), obj.GetCode(), narrow);
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- EmitPlaceholderBne(this, bne_label);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- // Note: We need a specific width for the unpoisoning NEG.
- if (kPoisonHeapReferences) {
- if (narrow) {
- // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
- __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
- } else {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ {
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
}
+ // Note: We need a specific width for the unpoisoning NEG.
+ if (kPoisonHeapReferences) {
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ }
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
}
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
- : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
+ MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip));
return;
}
@@ -8796,23 +8680,26 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
__ Add(data_reg, obj, Operand(data_offset));
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- EmitPlaceholderBne(this, bne_label);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
- DCHECK(!needs_null_check); // The thunk cannot handle the null check.
- // Note: We need a Wide NEG for the unpoisoning.
- if (kPoisonHeapReferences) {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ {
+ vixl::EmissionCheckScope guard(
+ GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ EmitPlaceholderBne(this, bne_label);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Note: We need a Wide NEG for the unpoisoning.
+ if (kPoisonHeapReferences) {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
+ }
+ __ Bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
}
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
return;
}
@@ -8866,6 +8753,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio
// Fast path: the GC is not marking: just load the reference.
GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
__ Bind(slow_path->GetExitLabel());
+ MaybeGenerateMarkingRegisterCheck(/* code */ 21);
}
void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
@@ -8920,6 +8808,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction
// Fast path: the GC is not marking: nothing to do (the field is
// up-to-date, and we don't need to load the reference).
__ Bind(slow_path->GetExitLabel());
+ MaybeGenerateMarkingRegisterCheck(/* code */ 22);
}
void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
@@ -8981,6 +8870,20 @@ void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
}
+void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
+ // The following condition is a compile-time one, so it does not have a run-time cost.
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
+ // The following condition is a run-time one; it is executed after the
+ // previous compile-time test, to avoid penalizing non-debug builds.
+ if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire();
+ GetAssembler()->GenerateMarkingRegisterCheck(temp,
+ kMarkingRegisterCheckBreakCodeBaseCode + code);
+ }
+ }
+}
+
void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction,
Location out,
Location ref,
@@ -9226,6 +9129,11 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeSt
return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
}
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch(
+ const DexFile& dex_file, dex::StringIndex string_index) {
+ return NewPcRelativePatch(dex_file, string_index.index_, &string_bss_entry_patches_);
+}
+
CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) {
patches->emplace_back(dex_file, offset_or_index);
@@ -9294,6 +9202,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa
/* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
baker_read_barrier_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
@@ -9306,13 +9215,15 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa
} else {
DCHECK(pc_relative_method_patches_.empty());
DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
+ linker_patches);
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
info.custom_data));
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 7ab2993161..e78bc15614 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -579,6 +579,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
dex::StringIndex string_index);
+ PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
+ dex::StringIndex string_index);
// Add a new baker read barrier patch and return the label to be bound
// before the BNE instruction.
@@ -661,6 +663,28 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ScaleFactor scale_factor,
bool needs_null_check);
+ // Emit code checking the status of the Marking Register, and
+ // aborting the program if MR does not match the value stored in the
+ // art::Thread object. Code is only emitted in debug mode and if
+ // CompilerOptions::EmitRunTimeChecksInDebugMode returns true.
+ //
+ // Argument `code` is used to identify the different occurrences of
+ // MaybeGenerateMarkingRegisterCheck in the code generator, and is
+ // used together with kMarkingRegisterCheckBreakCodeBaseCode to
+ // create the value passed to the BKPT instruction. Note that unlike
+ // in the ARM64 code generator, where `__LINE__` is passed as `code`
+ // argument to
+ // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck, we cannot
+ // realistically do that here, as Encoding T1 for the BKPT
+ // instruction only accepts 8-bit immediate values.
+ //
+ // If `temp_loc` is a valid location, it is expected to be a
+ // register and will be used as a temporary to generate code;
+ // otherwise, a temporary will be fetched from the core register
+ // scratch pool.
+ virtual void MaybeGenerateMarkingRegisterCheck(int code,
+ Location temp_loc = Location::NoLocation());
+
// Generate a read barrier for a heap reference within `instruction`
// using a slow path.
//
@@ -781,8 +805,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+ // PC-relative String patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
// Baker read barrier patch info.
ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index b6eb5c1d1d..ac8f675e2d 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -267,13 +267,10 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
DCHECK(bss_info_high_);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, bss_info_high_);
- bool reordering = __ SetReorder(false);
- __ Bind(&info_low->label);
- __ StoreToOffset(kStoreWord,
- calling_convention.GetRegisterAt(0),
- entry_address,
- /* placeholder */ 0x5678);
- __ SetReorder(reordering);
+ __ Sw(calling_convention.GetRegisterAt(0),
+ entry_address,
+ /* placeholder */ 0x5678,
+ &info_low->label);
}
// Move the class to the desired location.
@@ -296,10 +293,8 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, info_high);
- bool reordering = __ SetReorder(false);
- mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low);
- __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
+ mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base);
+ __ Sw(out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678, &info_low->label);
}
__ B(GetExitLabel());
}
@@ -365,14 +360,11 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
// The string entry address was preserved in `entry_address` thanks to kSaveEverything.
DCHECK(bss_info_high_);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, bss_info_high_);
- bool reordering = __ SetReorder(false);
- __ Bind(&info_low->label);
- __ StoreToOffset(kStoreWord,
- calling_convention.GetRegisterAt(0),
- entry_address,
- /* placeholder */ 0x5678);
- __ SetReorder(reordering);
+ mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, bss_info_high_);
+ __ Sw(calling_convention.GetRegisterAt(0),
+ entry_address,
+ /* placeholder */ 0x5678,
+ &info_low->label);
}
Primitive::Type type = instruction_->GetType();
@@ -388,13 +380,11 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6();
Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
- mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high);
- bool reordering = __ SetReorder(false);
- mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low);
- __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
+ mips_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, info_high);
+ mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base);
+ __ Sw(out, TMP, /* placeholder */ 0x5678, &info_low->label);
}
__ B(GetExitLabel());
}
@@ -1111,6 +1101,7 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
clobbered_ra_(false) {
@@ -1661,7 +1652,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
method_bss_entry_patches_.size() +
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
- pc_relative_string_patches_.size();
+ pc_relative_string_patches_.size() +
+ string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
@@ -1673,13 +1665,15 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch
} else {
DCHECK(pc_relative_method_patches_.empty());
DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
+ linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
@@ -1722,6 +1716,13 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPa
return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_);
}
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewStringBssEntryPatch(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_);
+}
+
CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch(
const DexFile& dex_file,
uint32_t offset_or_index,
@@ -1743,16 +1744,17 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address)
void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
Register out,
- Register base,
- PcRelativePatchInfo* info_low) {
+ Register base) {
DCHECK(!info_high->patch_info_high);
DCHECK_NE(out, base);
+ bool reordering = __ SetReorder(false);
if (GetInstructionSetFeatures().IsR6()) {
DCHECK_EQ(base, ZERO);
__ Bind(&info_high->label);
__ Bind(&info_high->pc_rel_label);
// Add the high half of a 32-bit offset to PC.
__ Auipc(out, /* placeholder */ 0x1234);
+ __ SetReorder(reordering);
} else {
// If base is ZERO, emit NAL to obtain the actual base.
if (base == ZERO) {
@@ -1766,15 +1768,12 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo
if (base == ZERO) {
__ Bind(&info_high->pc_rel_label);
}
+ __ SetReorder(reordering);
// Add the high half of a 32-bit offset to PC.
__ Addu(out, out, (base == ZERO) ? RA : base);
}
// A following instruction will add the sign-extended low half of the 32-bit
// offset to `out` (e.g. lw, jialc, addiu).
- if (info_low != nullptr) {
- DCHECK_EQ(info_low->patch_info_high, info_high);
- __ Bind(&info_low->label);
- }
}
CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch(
@@ -6573,7 +6572,8 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc
DCHECK(!label_low);
__ AddUpper(base, obj, offset_high);
}
- __ Beqz(T9, (isR6 ? 2 : 4)); // Skip jialc / addiu+jalr+nop.
+ MipsLabel skip_call;
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
if (label_low != nullptr) {
DCHECK(short_offset);
__ Bind(label_low);
@@ -6588,6 +6588,7 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc
__ Jalr(T9);
__ Nop();
}
+ __ Bind(&skip_call);
__ SetReorder(reordering);
} else {
// Note that we do not actually check the value of `GetIsGcMarking()`
@@ -6724,27 +6725,31 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst
__ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
Register ref_reg = ref.AsRegister<Register>();
Register base = short_offset ? obj : TMP;
+ MipsLabel skip_call;
if (short_offset) {
if (isR6) {
- __ Beqzc(T9, 2); // Skip jialc.
+ __ Beqzc(T9, &skip_call, /* is_bare */ true);
__ Nop(); // In forbidden slot.
__ Jialc(T9, thunk_disp);
} else {
- __ Beqz(T9, 3); // Skip jalr+nop.
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
__ Addiu(T9, T9, thunk_disp); // In delay slot.
__ Jalr(T9);
__ Nop(); // In delay slot.
}
+ __ Bind(&skip_call);
} else {
if (isR6) {
- __ Beqz(T9, 2); // Skip jialc.
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
__ Aui(base, obj, offset_high); // In delay slot.
__ Jialc(T9, thunk_disp);
+ __ Bind(&skip_call);
} else {
__ Lui(base, offset_high);
- __ Beqz(T9, 2); // Skip jalr.
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
__ Addiu(T9, T9, thunk_disp); // In delay slot.
__ Jalr(T9);
+ __ Bind(&skip_call);
__ Addu(base, base, obj); // In delay slot.
}
}
@@ -6826,15 +6831,18 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst
Register index_reg = index.IsRegisterPair()
? index.AsRegisterPairLow<Register>()
: index.AsRegister<Register>();
+ MipsLabel skip_call;
if (GetInstructionSetFeatures().IsR6()) {
- __ Beqz(T9, 2); // Skip jialc.
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
__ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot.
__ Jialc(T9, thunk_disp);
+ __ Bind(&skip_call);
} else {
__ Sll(TMP, index_reg, scale_factor);
- __ Beqz(T9, 2); // Skip jalr.
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
__ Addiu(T9, T9, thunk_disp); // In delay slot.
__ Jalr(T9);
+ __ Bind(&skip_call);
__ Addu(TMP, TMP, obj); // In delay slot.
}
// /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor))
@@ -7368,6 +7376,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
bool fallback_load = has_irreducible_loops && !is_r6;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -7506,11 +7515,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
PcRelativePatchInfo* info_high = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
PcRelativePatchInfo* info_low =
NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high);
- bool reordering = __ SetReorder(false);
Register temp_reg = temp.AsRegister<Register>();
- EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low);
- __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+ __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
@@ -7522,10 +7529,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
PcRelativePatchInfo* info_low = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high);
Register temp_reg = temp.AsRegister<Register>();
- bool reordering = __ SetReorder(false);
- EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low);
- __ Lw(temp_reg, TMP, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+ __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
@@ -7720,13 +7725,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
- bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
- base_or_current_method_reg,
- info_low);
- __ Addiu(out, out, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
+ base_or_current_method_reg);
+ __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
@@ -7745,11 +7747,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high);
constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>();
- bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high,
temp,
base_or_current_method_reg);
- __ SetReorder(reordering);
GenerateGcRootFieldLoad(cls,
out_loc,
temp,
@@ -7829,6 +7829,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
// We need an extra register for PC-relative literals on R2.
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
if (isR6) {
break;
@@ -7875,6 +7876,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
// We need an extra register for PC-relative literals on R2.
case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
break;
@@ -7890,14 +7892,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
- bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
- base_or_current_method_reg,
- info_low);
- __ Addiu(out, out, /* placeholder */ 0x5678);
- __ SetReorder(reordering);
- return; // No dex cache slow path.
+ base_or_current_method_reg);
+ __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
+ return;
}
case HLoadString::LoadKind::kBootImageAddress: {
uint32_t address = dchecked_integral_cast<uint32_t>(
@@ -7906,21 +7905,31 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
__ LoadLiteral(out,
base_or_current_method_reg,
codegen_->DeduplicateBootImageAddressLiteral(address));
- return; // No dex cache slow path.
+ return;
}
- case HLoadString::LoadKind::kBssEntry: {
+ case HLoadString::LoadKind::kBootImageInternTable: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
+ out,
+ base_or_current_method_reg);
+ __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label);
+ return;
+ }
+ case HLoadString::LoadKind::kBssEntry: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
+ codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
+ CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
+ codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>();
- bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
temp,
base_or_current_method_reg);
- __ SetReorder(reordering);
GenerateGcRootFieldLoad(load,
out_loc,
temp,
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 7195b9d89d..f15f8c672a 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -633,12 +633,14 @@ class CodeGeneratorMIPS : public CodeGenerator {
PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
dex::StringIndex string_index,
const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high = nullptr);
Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
Register out,
- Register base,
- PcRelativePatchInfo* info_low = nullptr);
+ Register base);
// The JitPatchInfo is used for JIT string and class loads.
struct JitPatchInfo {
@@ -700,8 +702,10 @@ class CodeGeneratorMIPS : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+ // PC-relative String patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
// Patches for string root accesses in JIT compiled code.
ArenaDeque<JitPatchInfo> jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 3e79f474b6..71c2bfff19 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -318,9 +318,9 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
// The string entry address was preserved in `entry_address` thanks to kSaveEverything.
DCHECK(bss_info_high_);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(),
- string_index,
- bss_info_high_);
+ mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(),
+ string_index,
+ bss_info_high_);
__ Bind(&info_low->label);
__ StoreToOffset(kStoreWord,
calling_convention.GetRegisterAt(0),
@@ -339,9 +339,9 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
// For non-Baker read barriers we need to re-calculate the address of
// the string entry.
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
- mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+ mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high);
+ mips64_codegen->NewStringBssEntryPatch(load->GetDexFile(), string_index, info_high);
mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, info_low);
__ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
}
@@ -1049,6 +1049,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -1560,7 +1561,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
method_bss_entry_patches_.size() +
pc_relative_type_patches_.size() +
type_bss_entry_patches_.size() +
- pc_relative_string_patches_.size();
+ pc_relative_string_patches_.size() +
+ string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
@@ -1572,13 +1574,15 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
} else {
DCHECK(pc_relative_method_patches_.empty());
DCHECK(pc_relative_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
+ linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
@@ -1621,6 +1625,13 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStri
return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_);
}
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewStringBssEntryPatch(
+ const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(dex_file, string_index.index_, info_high, &string_bss_entry_patches_);
+}
+
CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch(
const DexFile& dex_file,
uint32_t offset_or_index,
@@ -3652,6 +3663,114 @@ void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond,
}
}
+bool InstructionCodeGeneratorMIPS64::MaterializeIntLongCompare(IfCondition cond,
+ bool is64bit,
+ LocationSummary* input_locations,
+ GpuRegister dst) {
+ GpuRegister lhs = input_locations->InAt(0).AsRegister<GpuRegister>();
+ Location rhs_location = input_locations->InAt(1);
+ GpuRegister rhs_reg = ZERO;
+ int64_t rhs_imm = 0;
+ bool use_imm = rhs_location.IsConstant();
+ if (use_imm) {
+ if (is64bit) {
+ rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant());
+ } else {
+ rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+ }
+ } else {
+ rhs_reg = rhs_location.AsRegister<GpuRegister>();
+ }
+ int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1);
+
+ switch (cond) {
+ case kCondEQ:
+ case kCondNE:
+ if (use_imm && IsInt<16>(-rhs_imm)) {
+ if (is64bit) {
+ __ Daddiu(dst, lhs, -rhs_imm);
+ } else {
+ __ Addiu(dst, lhs, -rhs_imm);
+ }
+ } else if (use_imm && IsUint<16>(rhs_imm)) {
+ __ Xori(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Xor(dst, lhs, rhs_reg);
+ }
+ return (cond == kCondEQ);
+
+ case kCondLT:
+ case kCondGE:
+ if (use_imm && IsInt<16>(rhs_imm)) {
+ __ Slti(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Slt(dst, lhs, rhs_reg);
+ }
+ return (cond == kCondGE);
+
+ case kCondLE:
+ case kCondGT:
+ if (use_imm && IsInt<16>(rhs_imm_plus_one)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ __ Slti(dst, lhs, rhs_imm_plus_one);
+ return (cond == kCondGT);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Slt(dst, rhs_reg, lhs);
+ return (cond == kCondLE);
+ }
+
+ case kCondB:
+ case kCondAE:
+ if (use_imm && IsInt<16>(rhs_imm)) {
+ // Sltiu sign-extends its 16-bit immediate operand before
+ // the comparison and thus lets us compare directly with
+ // unsigned values in the ranges [0, 0x7fff] and
+ // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+ __ Sltiu(dst, lhs, rhs_imm);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Sltu(dst, lhs, rhs_reg);
+ }
+ return (cond == kCondAE);
+
+ case kCondBE:
+ case kCondA:
+ if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) {
+ // Simulate lhs <= rhs via lhs < rhs + 1.
+ // Note that this only works if rhs + 1 does not overflow
+ // to 0, hence the check above.
+ // Sltiu sign-extends its 16-bit immediate operand before
+ // the comparison and thus lets us compare directly with
+ // unsigned values in the ranges [0, 0x7fff] and
+ // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff].
+ __ Sltiu(dst, lhs, rhs_imm_plus_one);
+ return (cond == kCondA);
+ } else {
+ if (use_imm) {
+ rhs_reg = TMP;
+ __ LoadConst64(rhs_reg, rhs_imm);
+ }
+ __ Sltu(dst, rhs_reg, lhs);
+ return (cond == kCondBE);
+ }
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond,
bool is64bit,
LocationSummary* locations,
@@ -3854,6 +3973,97 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompare(IfCondition cond,
}
}
+bool InstructionCodeGeneratorMIPS64::MaterializeFpCompare(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* input_locations,
+ FpuRegister dst) {
+ FpuRegister lhs = input_locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister rhs = input_locations->InAt(1).AsFpuRegister<FpuRegister>();
+ if (type == Primitive::kPrimFloat) {
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqS(dst, lhs, rhs);
+ return false;
+ case kCondNE:
+ __ CmpEqS(dst, lhs, rhs);
+ return true;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtS(dst, lhs, rhs);
+ } else {
+ __ CmpUltS(dst, lhs, rhs);
+ }
+ return false;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeS(dst, lhs, rhs);
+ } else {
+ __ CmpUleS(dst, lhs, rhs);
+ }
+ return false;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltS(dst, rhs, lhs);
+ } else {
+ __ CmpLtS(dst, rhs, lhs);
+ }
+ return false;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleS(dst, rhs, lhs);
+ } else {
+ __ CmpLeS(dst, rhs, lhs);
+ }
+ return false;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+ UNREACHABLE();
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ switch (cond) {
+ case kCondEQ:
+ __ CmpEqD(dst, lhs, rhs);
+ return false;
+ case kCondNE:
+ __ CmpEqD(dst, lhs, rhs);
+ return true;
+ case kCondLT:
+ if (gt_bias) {
+ __ CmpLtD(dst, lhs, rhs);
+ } else {
+ __ CmpUltD(dst, lhs, rhs);
+ }
+ return false;
+ case kCondLE:
+ if (gt_bias) {
+ __ CmpLeD(dst, lhs, rhs);
+ } else {
+ __ CmpUleD(dst, lhs, rhs);
+ }
+ return false;
+ case kCondGT:
+ if (gt_bias) {
+ __ CmpUltD(dst, rhs, lhs);
+ } else {
+ __ CmpLtD(dst, rhs, lhs);
+ }
+ return false;
+ case kCondGE:
+ if (gt_bias) {
+ __ CmpUleD(dst, rhs, lhs);
+ } else {
+ __ CmpLeD(dst, rhs, lhs);
+ }
+ return false;
+ default:
+ LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+ UNREACHABLE();
+ }
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond,
bool gt_bias,
Primitive::Type type,
@@ -3905,6 +4115,7 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond
break;
default:
LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
}
} else {
DCHECK_EQ(type, Primitive::kPrimDouble);
@@ -3951,6 +4162,7 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond
break;
default:
LOG(FATAL) << "Unexpected non-floating-point condition";
+ UNREACHABLE();
}
}
}
@@ -4069,6 +4281,306 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
/* false_target */ nullptr);
}
+// This function returns true if a conditional move can be generated for HSelect.
+// Otherwise it returns false and HSelect must be implemented in terms of conditonal
+// branches and regular moves.
+//
+// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect.
+//
+// While determining feasibility of a conditional move and setting inputs/outputs
+// are two distinct tasks, this function does both because they share quite a bit
+// of common logic.
+static bool CanMoveConditionally(HSelect* select, LocationSummary* locations_to_set) {
+ bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition());
+ HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ HCondition* condition = cond->AsCondition();
+
+ Primitive::Type cond_type = materialized ? Primitive::kPrimInt : condition->InputAt(0)->GetType();
+ Primitive::Type dst_type = select->GetType();
+
+ HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
+ HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+ bool is_true_value_zero_constant =
+ (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern());
+ bool is_false_value_zero_constant =
+ (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern());
+
+ bool can_move_conditionally = false;
+ bool use_const_for_false_in = false;
+ bool use_const_for_true_in = false;
+
+ if (!cond->IsConstant()) {
+ if (!Primitive::IsFloatingPointType(cond_type)) {
+ if (!Primitive::IsFloatingPointType(dst_type)) {
+ // Moving int/long on int/long condition.
+ if (is_true_value_zero_constant) {
+ // seleqz out_reg, false_reg, cond_reg
+ can_move_conditionally = true;
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // selnez out_reg, true_reg, cond_reg
+ can_move_conditionally = true;
+ use_const_for_false_in = true;
+ } else if (materialized) {
+ // Not materializing unmaterialized int conditions
+ // to keep the instruction count low.
+ // selnez AT, true_reg, cond_reg
+ // seleqz TMP, false_reg, cond_reg
+ // or out_reg, AT, TMP
+ can_move_conditionally = true;
+ }
+ } else {
+ // Moving float/double on int/long condition.
+ if (materialized) {
+ // Not materializing unmaterialized int conditions
+ // to keep the instruction count low.
+ can_move_conditionally = true;
+ if (is_true_value_zero_constant) {
+ // sltu TMP, ZERO, cond_reg
+ // mtc1 TMP, temp_cond_reg
+ // seleqz.fmt out_reg, false_reg, temp_cond_reg
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // sltu TMP, ZERO, cond_reg
+ // mtc1 TMP, temp_cond_reg
+ // selnez.fmt out_reg, true_reg, temp_cond_reg
+ use_const_for_false_in = true;
+ } else {
+ // sltu TMP, ZERO, cond_reg
+ // mtc1 TMP, temp_cond_reg
+ // sel.fmt temp_cond_reg, false_reg, true_reg
+ // mov.fmt out_reg, temp_cond_reg
+ }
+ }
+ }
+ } else {
+ if (!Primitive::IsFloatingPointType(dst_type)) {
+ // Moving int/long on float/double condition.
+ can_move_conditionally = true;
+ if (is_true_value_zero_constant) {
+ // mfc1 TMP, temp_cond_reg
+ // seleqz out_reg, false_reg, TMP
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // mfc1 TMP, temp_cond_reg
+ // selnez out_reg, true_reg, TMP
+ use_const_for_false_in = true;
+ } else {
+ // mfc1 TMP, temp_cond_reg
+ // selnez AT, true_reg, TMP
+ // seleqz TMP, false_reg, TMP
+ // or out_reg, AT, TMP
+ }
+ } else {
+ // Moving float/double on float/double condition.
+ can_move_conditionally = true;
+ if (is_true_value_zero_constant) {
+ // seleqz.fmt out_reg, false_reg, temp_cond_reg
+ use_const_for_true_in = true;
+ } else if (is_false_value_zero_constant) {
+ // selnez.fmt out_reg, true_reg, temp_cond_reg
+ use_const_for_false_in = true;
+ } else {
+ // sel.fmt temp_cond_reg, false_reg, true_reg
+ // mov.fmt out_reg, temp_cond_reg
+ }
+ }
+ }
+ }
+
+ if (can_move_conditionally) {
+ DCHECK(!use_const_for_false_in || !use_const_for_true_in);
+ } else {
+ DCHECK(!use_const_for_false_in);
+ DCHECK(!use_const_for_true_in);
+ }
+
+ if (locations_to_set != nullptr) {
+ if (use_const_for_false_in) {
+ locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value));
+ } else {
+ locations_to_set->SetInAt(0,
+ Primitive::IsFloatingPointType(dst_type)
+ ? Location::RequiresFpuRegister()
+ : Location::RequiresRegister());
+ }
+ if (use_const_for_true_in) {
+ locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value));
+ } else {
+ locations_to_set->SetInAt(1,
+ Primitive::IsFloatingPointType(dst_type)
+ ? Location::RequiresFpuRegister()
+ : Location::RequiresRegister());
+ }
+ if (materialized) {
+ locations_to_set->SetInAt(2, Location::RequiresRegister());
+ }
+
+ if (can_move_conditionally) {
+ locations_to_set->SetOut(Primitive::IsFloatingPointType(dst_type)
+ ? Location::RequiresFpuRegister()
+ : Location::RequiresRegister());
+ } else {
+ locations_to_set->SetOut(Location::SameAsFirstInput());
+ }
+ }
+
+ return can_move_conditionally;
+}
+
+
+void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) {
+ LocationSummary* locations = select->GetLocations();
+ Location dst = locations->Out();
+ Location false_src = locations->InAt(0);
+ Location true_src = locations->InAt(1);
+ HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ GpuRegister cond_reg = TMP;
+ FpuRegister fcond_reg = FTMP;
+ Primitive::Type cond_type = Primitive::kPrimInt;
+ bool cond_inverted = false;
+ Primitive::Type dst_type = select->GetType();
+
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<GpuRegister>();
+ } else {
+ HCondition* condition = cond->AsCondition();
+ LocationSummary* cond_locations = cond->GetLocations();
+ IfCondition if_cond = condition->GetCondition();
+ cond_type = condition->InputAt(0)->GetType();
+ switch (cond_type) {
+ default:
+ cond_inverted = MaterializeIntLongCompare(if_cond,
+ /* is64bit */ false,
+ cond_locations,
+ cond_reg);
+ break;
+ case Primitive::kPrimLong:
+ cond_inverted = MaterializeIntLongCompare(if_cond,
+ /* is64bit */ true,
+ cond_locations,
+ cond_reg);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ cond_inverted = MaterializeFpCompare(if_cond,
+ condition->IsGtBias(),
+ cond_type,
+ cond_locations,
+ fcond_reg);
+ break;
+ }
+ }
+
+ if (true_src.IsConstant()) {
+ DCHECK(true_src.GetConstant()->IsZeroBitPattern());
+ }
+ if (false_src.IsConstant()) {
+ DCHECK(false_src.GetConstant()->IsZeroBitPattern());
+ }
+
+ switch (dst_type) {
+ default:
+ if (Primitive::IsFloatingPointType(cond_type)) {
+ __ Mfc1(cond_reg, fcond_reg);
+ }
+ if (true_src.IsConstant()) {
+ if (cond_inverted) {
+ __ Selnez(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg);
+ } else {
+ __ Seleqz(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg);
+ }
+ } else if (false_src.IsConstant()) {
+ if (cond_inverted) {
+ __ Seleqz(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg);
+ } else {
+ __ Selnez(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg);
+ }
+ } else {
+ DCHECK_NE(cond_reg, AT);
+ if (cond_inverted) {
+ __ Seleqz(AT, true_src.AsRegister<GpuRegister>(), cond_reg);
+ __ Selnez(TMP, false_src.AsRegister<GpuRegister>(), cond_reg);
+ } else {
+ __ Selnez(AT, true_src.AsRegister<GpuRegister>(), cond_reg);
+ __ Seleqz(TMP, false_src.AsRegister<GpuRegister>(), cond_reg);
+ }
+ __ Or(dst.AsRegister<GpuRegister>(), AT, TMP);
+ }
+ break;
+ case Primitive::kPrimFloat: {
+ if (!Primitive::IsFloatingPointType(cond_type)) {
+ // sel*.fmt tests bit 0 of the condition register, account for that.
+ __ Sltu(TMP, ZERO, cond_reg);
+ __ Mtc1(TMP, fcond_reg);
+ }
+ FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>();
+ if (true_src.IsConstant()) {
+ FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>();
+ if (cond_inverted) {
+ __ SelnezS(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SeleqzS(dst_reg, src_reg, fcond_reg);
+ }
+ } else if (false_src.IsConstant()) {
+ FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>();
+ if (cond_inverted) {
+ __ SeleqzS(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SelnezS(dst_reg, src_reg, fcond_reg);
+ }
+ } else {
+ if (cond_inverted) {
+ __ SelS(fcond_reg,
+ true_src.AsFpuRegister<FpuRegister>(),
+ false_src.AsFpuRegister<FpuRegister>());
+ } else {
+ __ SelS(fcond_reg,
+ false_src.AsFpuRegister<FpuRegister>(),
+ true_src.AsFpuRegister<FpuRegister>());
+ }
+ __ MovS(dst_reg, fcond_reg);
+ }
+ break;
+ }
+ case Primitive::kPrimDouble: {
+ if (!Primitive::IsFloatingPointType(cond_type)) {
+ // sel*.fmt tests bit 0 of the condition register, account for that.
+ __ Sltu(TMP, ZERO, cond_reg);
+ __ Mtc1(TMP, fcond_reg);
+ }
+ FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>();
+ if (true_src.IsConstant()) {
+ FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>();
+ if (cond_inverted) {
+ __ SelnezD(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SeleqzD(dst_reg, src_reg, fcond_reg);
+ }
+ } else if (false_src.IsConstant()) {
+ FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>();
+ if (cond_inverted) {
+ __ SeleqzD(dst_reg, src_reg, fcond_reg);
+ } else {
+ __ SelnezD(dst_reg, src_reg, fcond_reg);
+ }
+ } else {
+ if (cond_inverted) {
+ __ SelD(fcond_reg,
+ true_src.AsFpuRegister<FpuRegister>(),
+ false_src.AsFpuRegister<FpuRegister>());
+ } else {
+ __ SelD(fcond_reg,
+ false_src.AsFpuRegister<FpuRegister>(),
+ true_src.AsFpuRegister<FpuRegister>());
+ }
+ __ MovD(dst_reg, fcond_reg);
+ }
+ break;
+ }
+ }
+}
+
void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(flag, LocationSummary::kNoCall);
@@ -4084,28 +4596,22 @@ void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimize
void LocationsBuilderMIPS64::VisitSelect(HSelect* select) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
- if (Primitive::IsFloatingPointType(select->GetType())) {
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- } else {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- }
- if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
- locations->SetInAt(2, Location::RequiresRegister());
- }
- locations->SetOut(Location::SameAsFirstInput());
+ CanMoveConditionally(select, locations);
}
void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) {
- LocationSummary* locations = select->GetLocations();
- Mips64Label false_target;
- GenerateTestAndBranch(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
- &false_target);
- codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
- __ Bind(&false_target);
+ if (CanMoveConditionally(select, /* locations_to_set */ nullptr)) {
+ GenConditionalMove(select);
+ } else {
+ LocationSummary* locations = select->GetLocations();
+ Mips64Label false_target;
+ GenerateTestAndBranch(select,
+ /* condition_input_index */ 2,
+ /* true_target */ nullptr,
+ &false_target);
+ codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+ __ Bind(&false_target);
+ }
}
void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -4490,7 +4996,8 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instr
DCHECK(!label_low);
__ Daui(base, obj, offset_high);
}
- __ Beqz(T9, 2); // Skip jialc.
+ Mips64Label skip_call;
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
if (label_low != nullptr) {
DCHECK(short_offset);
__ Bind(label_low);
@@ -4499,6 +5006,7 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instr
__ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low); // Single instruction
// in delay slot.
__ Jialc(T9, thunk_disp);
+ __ Bind(&skip_call);
} else {
// Note that we do not actually check the value of `GetIsGcMarking()`
// to decide whether to mark the loaded GC root or not. Instead, we
@@ -4617,18 +5125,21 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+ Mips64Label skip_call;
if (short_offset) {
- __ Beqzc(T9, 2); // Skip jialc.
+ __ Beqzc(T9, &skip_call, /* is_bare */ true);
__ Nop(); // In forbidden slot.
__ Jialc(T9, thunk_disp);
+ __ Bind(&skip_call);
// /* HeapReference<Object> */ ref = *(obj + offset)
__ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); // Single instruction.
} else {
int16_t offset_low = Low16Bits(offset);
int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu.
- __ Beqz(T9, 2); // Skip jialc.
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
__ Daui(TMP, obj, offset_high); // In delay slot.
__ Jialc(T9, thunk_disp);
+ __ Bind(&skip_call);
// /* HeapReference<Object> */ ref = *(obj + offset)
__ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low); // Single instruction.
}
@@ -4702,11 +5213,13 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
- __ Beqz(T9, 2); // Skip jialc.
+ Mips64Label skip_call;
+ __ Beqz(T9, &skip_call, /* is_bare */ true);
GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
GpuRegister index_reg = index.AsRegister<GpuRegister>();
__ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot.
__ Jialc(T9, thunk_disp);
+ __ Bind(&skip_call);
// /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor))
DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset;
__ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset); // Single instruction.
@@ -5227,6 +5740,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
bool fallback_load = false;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5615,7 +6129,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
__ Daddiu(out, AT, /* placeholder */ 0x5678);
- return; // No dex cache slow path.
+ return;
}
case HLoadString::LoadKind::kBootImageAddress: {
uint32_t address = dchecked_integral_cast<uint32_t>(
@@ -5624,14 +6138,24 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
__ LoadLiteral(out,
kLoadUnsignedWord,
codegen_->DeduplicateBootImageAddressLiteral(address));
- return; // No dex cache slow path.
+ return;
}
- case HLoadString::LoadKind::kBssEntry: {
+ case HLoadString::LoadKind::kBootImageInternTable: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+ __ Lwu(out, AT, /* placeholder */ 0x5678);
+ return;
+ }
+ case HLoadString::LoadKind::kBssEntry: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
+ codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
+ CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
+ codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier;
GpuRegister temp = non_baker_read_barrier
? out
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index d03a9eabd4..3035621972 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -293,6 +293,13 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations);
+ // When the function returns `false` it means that the condition holds if `dst` is non-zero
+ // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
+ // `dst` are exchanged.
+ bool MaterializeIntLongCompare(IfCondition cond,
+ bool is64bit,
+ LocationSummary* input_locations,
+ GpuRegister dst);
void GenerateIntLongCompareAndBranch(IfCondition cond,
bool is64bit,
LocationSummary* locations,
@@ -301,6 +308,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
bool gt_bias,
Primitive::Type type,
LocationSummary* locations);
+ // When the function returns `false` it means that the condition holds if `dst` is non-zero
+ // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
+ // `dst` are exchanged.
+ bool MaterializeFpCompare(IfCondition cond,
+ bool gt_bias,
+ Primitive::Type type,
+ LocationSummary* input_locations,
+ FpuRegister dst);
void GenerateFpCompareAndBranch(IfCondition cond,
bool gt_bias,
Primitive::Type type,
@@ -320,6 +335,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
int32_t VecAddress(LocationSummary* locations,
size_t size,
/* out */ GpuRegister* adjusted_base);
+ void GenConditionalMove(HSelect* select);
Mips64Assembler* const assembler_;
CodeGeneratorMIPS64* const codegen_;
@@ -589,6 +605,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
dex::StringIndex string_index,
const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ const PcRelativePatchInfo* info_high = nullptr);
Literal* DeduplicateBootImageAddressLiteral(uint64_t address);
void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high,
@@ -650,8 +669,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+ // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+ // PC-relative type patch info for kBssEntry.
+ ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
// Patches for string root accesses in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index f422b9fc8b..18a55c8b09 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -15,7 +15,9 @@
*/
#include "code_generator_arm64.h"
+
#include "mirror/array-inl.h"
+#include "mirror/string.h"
using namespace vixl::aarch64; // NOLINT(build/namespaces)
@@ -25,12 +27,13 @@ namespace arm64 {
using helpers::ARM64EncodableConstantOrRegister;
using helpers::Arm64CanEncodeConstantAsImmediate;
using helpers::DRegisterFrom;
-using helpers::VRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
using helpers::Int64ConstantFrom;
-using helpers::XRegisterFrom;
+using helpers::OutputRegister;
+using helpers::VRegisterFrom;
using helpers::WRegisterFrom;
+using helpers::XRegisterFrom;
#define __ GetVIXLAssembler()->
@@ -125,20 +128,51 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
}
}
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Umov(OutputRegister(instruction), src.V4S(), 0);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Umov(OutputRegister(instruction), src.V2D(), 0);
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
@@ -167,6 +201,46 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
+void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister src = VRegisterFrom(locations->InAt(0));
+ VRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ Addv(dst.S(), src.V4S());
+ break;
+ case HVecReduce::kMin:
+ __ Sminv(dst.S(), src.V4S());
+ break;
+ case HVecReduce::kMax:
+ __ Smaxv(dst.S(), src.V4S());
+ break;
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ Addp(dst.D(), src.V2D());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD min/max";
+ UNREACHABLE();
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -261,6 +335,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
}
}
@@ -803,6 +878,77 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
}
}
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister dst = VRegisterFrom(locations->Out());
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ Movi(dst.V16B(), 0);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
switch (instr->GetPackedType()) {
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 527691d9d9..7a11dff41e 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -73,19 +73,11 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala
}
}
-void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -112,6 +104,14 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
+void LocationsBuilderARMVIXL::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -621,6 +621,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
}
}
+void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index ea36e90112..c2fbf7f04b 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -88,19 +88,11 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar*
}
}
-void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -133,6 +125,14 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
+void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -818,12 +818,83 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
}
}
+void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+ DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = instr->GetLocations();
+ VectorRegister acc =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+ VectorRegister left =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+ VectorRegister right =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvB(acc, left, right);
+ } else {
+ __ MsubvB(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvH(acc, left, right);
+ } else {
+ __ MsubvH(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvW(acc, left, right);
+ } else {
+ __ MsubvW(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvD(acc, left, right);
+ } else {
+ __ MsubvD(acc, left, right);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 0395db1df9..9d3a777c13 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -91,19 +91,11 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar
}
}
-void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
@@ -136,6 +128,14 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
+void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -822,12 +822,83 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
}
}
+void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+ DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = instr->GetLocations();
+ VectorRegister acc =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+ VectorRegister left =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+ VectorRegister right =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvB(acc, left, right);
+ } else {
+ __ MsubvB(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvH(acc, left, right);
+ } else {
+ __ MsubvH(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvW(acc, left, right);
+ } else {
+ __ MsubvW(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvD(acc, left, right);
+ } else {
+ __ MsubvD(acc, left, right);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 14782d70a1..37190f8363 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -15,7 +15,9 @@
*/
#include "code_generator_x86.h"
+
#include "mirror/array-inl.h"
+#include "mirror/string.h"
namespace art {
namespace x86 {
@@ -25,23 +27,31 @@ namespace x86 {
void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimLong:
- // Long needs extra temporary to load the register pair.
- locations->AddTemp(Location::RequiresFpuRegister());
+ // Long needs extra temporary to load from the register pair.
+ if (!is_zero) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
FALLTHROUGH_INTENDED;
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(is_zero ? Location::RequiresFpuRegister()
+ : Location::SameAsFirstInput());
+
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -51,46 +61,53 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi
void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ __ xorps(dst, dst);
+ return;
+ }
+
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<Register>());
- __ punpcklbw(reg, reg);
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ __ punpcklbw(dst, dst);
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<Register>());
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<Register>());
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimLong: {
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
__ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
- __ punpckldq(reg, tmp);
- __ punpcklqdq(reg, reg);
+ __ punpckldq(dst, tmp);
+ __ punpcklqdq(dst, dst);
break;
}
case Primitive::kPrimFloat:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(reg, reg, Immediate(0));
+ __ shufps(dst, dst, Immediate(0));
break;
case Primitive::kPrimDouble:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(reg, reg, Immediate(0));
+ __ shufpd(dst, dst, Immediate(0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -98,20 +115,65 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
}
}
-void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ // Long needs extra temporary to store into the register pair.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_LE(4u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ movd(locations->Out().AsRegister<Register>(), src);
+ break;
+ case Primitive::kPrimLong: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
+ __ pshufd(tmp, src, Immediate(1));
+ __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
@@ -135,6 +197,73 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
+void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Long reduction or min/max require a temporary.
+ if (instruction->GetPackedType() == Primitive::kPrimLong ||
+ instruction->GetKind() == HVecReduce::kMin ||
+ instruction->GetKind() == HVecReduce::kMax) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(dst, src);
+ __ phaddd(dst, dst);
+ __ phaddd(dst, dst);
+ break;
+ case HVecReduce::kMin: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pminsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pminsd(dst, tmp);
+ break;
+ }
+ case HVecReduce::kMax: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pmaxsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pmaxsd(dst, tmp);
+ break;
+ }
+ }
+ break;
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ punpckhqdq(tmp, tmp);
+ __ paddq(dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ case HVecReduce::kMax:
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -819,6 +948,91 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
}
}
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ // Long needs extra temporary to load from register pairs.
+ if (!is_zero) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ xorps(dst, dst);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<Register>());
+ break;
+ case Primitive::kPrimLong: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorps(tmp, tmp);
+ __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
+ __ punpckldq(dst, tmp);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -866,6 +1080,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin
case 8: scale = TIMES_8; break;
default: break;
}
+ // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -900,7 +1115,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
__ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
__ j(kNotZero, &not_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+ __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index 246044ebb8..7051ba041f 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -15,7 +15,9 @@
*/
#include "code_generator_x86_64.h"
+
#include "mirror/array-inl.h"
+#include "mirror/string.h"
namespace art {
namespace x86_64 {
@@ -25,6 +27,8 @@ namespace x86_64 {
void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
@@ -32,13 +36,16 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
locations->SetOut(Location::RequiresFpuRegister());
break;
case Primitive::kPrimFloat:
case Primitive::kPrimDouble:
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(is_zero ? Location::RequiresFpuRegister()
+ : Location::SameAsFirstInput());
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -48,42 +55,49 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = instruction->GetLocations();
- XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ __ xorps(dst, dst);
+ return;
+ }
+
switch (instruction->GetPackedType()) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklbw(reg, reg);
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklbw(dst, dst);
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ punpcklwd(reg, reg);
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklwd(dst, dst);
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimInt:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
- __ pshufd(reg, reg, Immediate(0));
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ __ pshufd(dst, dst, Immediate(0));
break;
case Primitive::kPrimLong:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
- __ punpcklqdq(reg, reg);
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ __ punpcklqdq(dst, dst);
break;
case Primitive::kPrimFloat:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ shufps(reg, reg, Immediate(0));
+ __ shufps(dst, dst, Immediate(0));
break;
case Primitive::kPrimDouble:
DCHECK(locations->InAt(0).Equals(locations->Out()));
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ shufpd(reg, reg, Immediate(0));
+ __ shufpd(dst, dst, Immediate(0));
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -91,20 +105,57 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar
}
}
-void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(locations->Out().AsRegister<CpuRegister>(), src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(locations->Out().AsRegister<CpuRegister>(), src); // is 64-bit
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
@@ -128,6 +179,73 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in
}
}
+void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Long reduction or min/max require a temporary.
+ if (instruction->GetPackedType() == Primitive::kPrimLong ||
+ instruction->GetKind() == HVecReduce::kMin ||
+ instruction->GetKind() == HVecReduce::kMax) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(dst, src);
+ __ phaddd(dst, dst);
+ __ phaddd(dst, dst);
+ break;
+ case HVecReduce::kMin: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pminsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pminsd(dst, tmp);
+ break;
+ }
+ case HVecReduce::kMax: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ psrldq(tmp, Immediate(8));
+ __ pmaxsd(dst, tmp);
+ __ psrldq(tmp, Immediate(4));
+ __ pmaxsd(dst, tmp);
+ break;
+ }
+ }
+ break;
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ movaps(tmp, src);
+ __ movaps(dst, src);
+ __ punpckhqdq(tmp, tmp);
+ __ paddq(dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ case HVecReduce::kMax:
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
}
@@ -812,6 +930,81 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
}
}
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ xorps(dst, dst);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: // TODO: up to here, and?
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>());
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
LOG(FATAL) << "No SIMD for " << instr->GetId();
}
@@ -859,6 +1052,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin
case 8: scale = TIMES_8; break;
default: break;
}
+ // Incorporate the string or array offset in the address computation.
uint32_t offset = is_string_char_at
? mirror::String::ValueOffset().Uint32Value()
: mirror::Array::DataOffset(size).Uint32Value();
@@ -893,7 +1087,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
__ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1));
__ j(kNotZero, &not_compressed);
// Zero extend 8 compressed bytes into 8 chars.
- __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
+ __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
__ pxor(tmp, tmp);
__ punpcklbw(reg, tmp);
__ jmp(&done);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 99b7793c81..512968f01d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1035,6 +1035,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
constant_area_start_(-1),
@@ -4652,7 +4653,6 @@ Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) {
}
void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
- DCHECK(GetCompilerOptions().IsBootImage());
HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
string_patches_.emplace_back(address,
load_string->GetDexFile(),
@@ -4664,9 +4664,9 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
DCHECK(!GetCompilerOptions().IsBootImage());
HX86ComputeBaseMethodAddress* address =
load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
- string_patches_.emplace_back(
+ string_bss_entry_patches_.emplace_back(
address, load_string->GetDexFile(), load_string->GetStringIndex().index_);
- return &string_patches_.back().label;
+ return &string_bss_entry_patches_.back().label;
}
// The label points to the end of the "movl" or another instruction but the literal offset
@@ -4691,7 +4691,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
method_bss_entry_patches_.size() +
boot_image_type_patches_.size() +
type_bss_entry_patches_.size() +
- string_patches_.size();
+ string_patches_.size() +
+ string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
@@ -4702,12 +4703,15 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
} else {
DCHECK(boot_image_method_patches_.empty());
DCHECK(boot_image_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
+ linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
@@ -6219,6 +6223,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -6237,6 +6242,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+ load_kind == HLoadString::LoadKind::kBootImageInternTable ||
load_kind == HLoadString::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6282,14 +6288,21 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
Register method_address = locations->InAt(0).AsRegister<Register>();
__ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
codegen_->RecordBootStringPatch(load);
- return; // No dex cache slow path.
+ return;
}
case HLoadString::LoadKind::kBootImageAddress: {
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(load->GetString().Get()));
DCHECK_NE(address, 0u);
__ movl(out, Immediate(address));
- return; // No dex cache slow path.
+ return;
+ }
+ case HLoadString::LoadKind::kBootImageInternTable: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ Register method_address = locations->InAt(0).AsRegister<Register>();
+ __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
+ codegen_->RecordBootStringPatch(load);
+ return;
}
case HLoadString::LoadKind::kBssEntry: {
Register method_address = locations->InAt(0).AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f48753b614..b32d57a774 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -640,8 +640,10 @@ class CodeGeneratorX86 : public CodeGenerator {
ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
// Type patch locations for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (app .bss or boot image).
+ // String patch locations; type depends on configuration (intern table or boot image PIC).
ArenaDeque<X86PcRelativePatchInfo> string_patches_;
+ // String patch locations for kBssEntry.
+ ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_;
// Patches for string root accesses in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 8283887a96..0c3b2ad742 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1089,15 +1089,15 @@ Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
}
void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
- DCHECK(GetCompilerOptions().IsBootImage());
string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
__ Bind(&string_patches_.back().label);
}
Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
DCHECK(!GetCompilerOptions().IsBootImage());
- string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
- return &string_patches_.back().label;
+ string_bss_entry_patches_.emplace_back(
+ load_string->GetDexFile(), load_string->GetStringIndex().index_);
+ return &string_bss_entry_patches_.back().label;
}
// The label points to the end of the "movl" or another instruction but the literal offset
@@ -1122,7 +1122,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
method_bss_entry_patches_.size() +
boot_image_type_patches_.size() +
type_bss_entry_patches_.size() +
- string_patches_.size();
+ string_patches_.size() +
+ string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
@@ -1133,12 +1134,15 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat
} else {
DCHECK(boot_image_method_patches_.empty());
DCHECK(boot_image_type_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_,
+ linker_patches);
}
EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
linker_patches);
EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
linker_patches);
+ EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
+ linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
@@ -1230,6 +1234,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ string_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
@@ -5621,6 +5626,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
@@ -5678,14 +5684,20 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
__ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
codegen_->RecordBootStringPatch(load);
- return; // No dex cache slow path.
+ return;
}
case HLoadString::LoadKind::kBootImageAddress: {
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(load->GetString().Get()));
DCHECK_NE(address, 0u);
__ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
- return; // No dex cache slow path.
+ return;
+ }
+ case HLoadString::LoadKind::kBootImageInternTable: {
+ DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+ codegen_->RecordBootStringPatch(load);
+ return;
}
case HLoadString::LoadKind::kBssEntry: {
Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 33c64290d4..f5fa86bf23 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -611,8 +611,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
// Type patch locations for kBssEntry.
ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (app .bss or boot image).
+ // String patch locations; type depends on configuration (intern table or boot image PIC).
ArenaDeque<PatchInfo<Label>> string_patches_;
+ // String patch locations for kBssEntry.
+ ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
// Patches for string literals in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_string_patches_;
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cada2e679b..aa4f5da3f0 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -79,6 +79,21 @@ class CodegenTargetConfig {
};
#ifdef ART_ENABLE_CODEGEN_arm
+// Special ARM code generator for codegen testing in a limited code
+// generation environment (i.e. with no runtime support).
+//
+// Note: If we want to exercise certains HIR constructions
+// (e.g. reference field load in Baker read barrier configuration) in
+// codegen tests in the future, we should also:
+// - save the Thread Register (R9) and possibly the Marking Register
+// (R8) before entering the generated function (both registers are
+// callee-save in AAPCS);
+// - set these registers to meaningful values before or upon entering
+// the generated function (so that generated code using them is
+// correct);
+// - restore their original values before leaving the generated
+// function.
+
// Provide our own codegen, that ensures the C calling conventions
// are preserved. Currently, ART and C do not match as R4 is caller-save
// in ART, and callee-save in C. Alternatively, we could use or write
@@ -100,6 +115,50 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
blocked_core_registers_[arm::R6] = false;
blocked_core_registers_[arm::R7] = false;
}
+
+ void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
+ Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+ // When turned on, the marking register checks in
+ // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
+ // Thread Register and the Marking Register to be set to
+ // meaningful values. This is not the case in codegen testing, so
+ // just disable them entirely here (by doing nothing in this
+ // method).
+ }
+};
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+// Special ARM64 code generator for codegen testing in a limited code
+// generation environment (i.e. with no runtime support).
+//
+// Note: If we want to exercise certains HIR constructions
+// (e.g. reference field load in Baker read barrier configuration) in
+// codegen tests in the future, we should also:
+// - save the Thread Register (X19) and possibly the Marking Register
+// (X20) before entering the generated function (both registers are
+// callee-save in AAPCS64);
+// - set these registers to meaningful values before or upon entering
+// the generated function (so that generated code using them is
+// correct);
+// - restore their original values before leaving the generated
+// function.
+class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
+ public:
+ TestCodeGeneratorARM64(HGraph* graph,
+ const Arm64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {}
+
+ void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED,
+ Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+ // When turned on, the marking register checks in
+ // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the
+ // Thread Register and the Marking Register to be set to
+ // meaningful values. This is not the case in codegen testing, so
+ // just disable them entirely here (by doing nothing in this
+ // method).
+ }
};
#endif
@@ -263,7 +322,8 @@ static void RunCode(CodegenTargetConfig target_config,
bool has_result,
Expected expected) {
CompilerOptions compiler_options;
- std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
+ std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph,
+ compiler_options));
RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
}
@@ -280,9 +340,8 @@ CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& c
CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
Arm64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
- *features_arm64.get(),
- compiler_options);
+ return new (graph->GetArena())
+ TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options);
}
#endif
diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc
index 0d4e1c5c97..fa3c4dfba8 100644
--- a/compiler/optimizing/emit_swap_mips_test.cc
+++ b/compiler/optimizing/emit_swap_mips_test.cc
@@ -91,7 +91,9 @@ class EmitSwapMipsTest : public ::testing::Test {
return nullptr;
}
- void DriverWrapper(HParallelMove* move, std::string assembly_text, std::string test_name) {
+ void DriverWrapper(HParallelMove* move,
+ const std::string& assembly_text,
+ const std::string& test_name) {
codegen_->GetMoveResolver()->EmitNativeCode(move);
assembler_ = codegen_->GetAssembler();
assembler_->FinalizeCode();
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index a20ec3c0db..3035e4657d 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -501,6 +501,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("field_type") << iset->GetFieldType();
}
+ void VisitStaticFieldGet(HStaticFieldGet* sget) OVERRIDE {
+ StartAttributeStream("field_name") <<
+ sget->GetFieldInfo().GetDexFile().PrettyField(sget->GetFieldInfo().GetFieldIndex(),
+ /* with type */ false);
+ StartAttributeStream("field_type") << sget->GetFieldType();
+ }
+
+ void VisitStaticFieldSet(HStaticFieldSet* sset) OVERRIDE {
+ StartAttributeStream("field_name") <<
+ sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(),
+ /* with type */ false);
+ StartAttributeStream("field_type") << sset->GetFieldType();
+ }
+
void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
StartAttributeStream("field_type") << field_access->GetFieldType();
}
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index f35aace3a9..089340e715 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -87,8 +87,10 @@ static bool IsGEZero(HInstruction* instruction) {
IsGEZero(instruction->InputAt(1));
case Intrinsics::kMathAbsInt:
case Intrinsics::kMathAbsLong:
- // Instruction ABS(x) is >= 0.
- return true;
+ // Instruction ABS(>=0) is >= 0.
+ // NOTE: ABS(minint) = minint prevents assuming
+ // >= 0 without looking at the argument.
+ return IsGEZero(instruction->InputAt(0));
default:
break;
}
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index ab1772bf15..0b980f596a 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -151,6 +151,16 @@ class InductionVarRange {
}
/**
+ * Checks if the given phi instruction has been classified as anything by
+ * induction variable analysis. Returns false for anything that cannot be
+ * classified statically, such as reductions or other complex cycles.
+ */
+ bool IsClassified(HPhi* phi) const {
+ HLoopInformation* lp = phi->GetBlock()->GetLoopInformation(); // closest enveloping loop
+ return (lp != nullptr) && (induction_analysis_->LookupInfo(lp, phi) != nullptr);
+ }
+
+ /**
* Checks if header logic of a loop terminates. Sets trip-count tc if known.
*/
bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 5c79511bab..f2a829fa56 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -59,6 +59,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
bool TryDeMorganNegationFactoring(HBinaryOperation* op);
bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction);
bool TrySubtractionChainSimplification(HBinaryOperation* instruction);
+ bool TryCombineVecMultiplyAccumulate(HVecMul* mul);
void VisitShift(HBinaryOperation* shift);
@@ -98,6 +99,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
void VisitInvoke(HInvoke* invoke) OVERRIDE;
void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
+ void VisitVecMul(HVecMul* instruction) OVERRIDE;
bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
@@ -243,6 +245,84 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation
return false;
}
+bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) {
+ Primitive::Type type = mul->GetPackedType();
+ InstructionSet isa = codegen_->GetInstructionSet();
+ switch (isa) {
+ case kArm64:
+ if (!(type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort ||
+ type == Primitive::kPrimInt)) {
+ return false;
+ }
+ break;
+ case kMips:
+ case kMips64:
+ if (!(type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort ||
+ type == Primitive::kPrimInt ||
+ type == Primitive::kPrimLong)) {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+ if (mul->HasOnlyOneNonEnvironmentUse()) {
+ HInstruction* use = mul->GetUses().front().GetUser();
+ if (use->IsVecAdd() || use->IsVecSub()) {
+ // Replace code looking like
+ // VECMUL tmp, x, y
+ // VECADD/SUB dst, acc, tmp
+ // with
+ // VECMULACC dst, acc, x, y
+ // Note that we do not want to (unconditionally) perform the merge when the
+ // multiplication has multiple uses and it can be merged in all of them.
+ // Multiple uses could happen on the same control-flow path, and we would
+ // then increase the amount of work. In the future we could try to evaluate
+ // whether all uses are on different control-flow paths (using dominance and
+ // reverse-dominance information) and only perform the merge when they are.
+ HInstruction* accumulator = nullptr;
+ HVecBinaryOperation* binop = use->AsVecBinaryOperation();
+ HInstruction* binop_left = binop->GetLeft();
+ HInstruction* binop_right = binop->GetRight();
+ // This is always true since the `HVecMul` has only one use (which is checked above).
+ DCHECK_NE(binop_left, binop_right);
+ if (binop_right == mul) {
+ accumulator = binop_left;
+ } else if (use->IsVecAdd()) {
+ DCHECK_EQ(binop_left, mul);
+ accumulator = binop_right;
+ }
+
+ HInstruction::InstructionKind kind =
+ use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
+ if (accumulator != nullptr) {
+ HVecMultiplyAccumulate* mulacc =
+ new (arena) HVecMultiplyAccumulate(arena,
+ kind,
+ accumulator,
+ mul->GetLeft(),
+ mul->GetRight(),
+ binop->GetPackedType(),
+ binop->GetVectorLength());
+
+ binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+ DCHECK(!mul->HasUses());
+ mul->GetBlock()->RemoveInstruction(mul);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
HInstruction* shift_amount = instruction->GetRight();
@@ -2301,4 +2381,10 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
return true;
}
+void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) {
+ if (TryCombineVecMultiplyAccumulate(instruction)) {
+ RecordSimplification();
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 311be1fb49..7c9bfb11b2 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -210,12 +210,6 @@ void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) {
}
}
-void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
- if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) {
- RecordSimplification();
- }
-}
-
void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
if (!instruction->IsStringCharAt()
&& TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 8596f6ad40..4f16fc383d 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -74,7 +74,6 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
void VisitUShr(HUShr* instruction) OVERRIDE;
void VisitXor(HXor* instruction) OVERRIDE;
- void VisitVecMul(HVecMul* instruction) OVERRIDE;
void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
void VisitVecStore(HVecStore* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index d1bc4dadeb..7a759b9118 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -281,73 +281,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
return true;
}
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
- Primitive::Type type = mul->GetPackedType();
- switch (isa) {
- case kArm64:
- if (!(type == Primitive::kPrimByte ||
- type == Primitive::kPrimChar ||
- type == Primitive::kPrimShort ||
- type == Primitive::kPrimInt)) {
- return false;
- }
- break;
- default:
- return false;
- }
-
- ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
-
- if (mul->HasOnlyOneNonEnvironmentUse()) {
- HInstruction* use = mul->GetUses().front().GetUser();
- if (use->IsVecAdd() || use->IsVecSub()) {
- // Replace code looking like
- // VECMUL tmp, x, y
- // VECADD/SUB dst, acc, tmp
- // with
- // VECMULACC dst, acc, x, y
- // Note that we do not want to (unconditionally) perform the merge when the
- // multiplication has multiple uses and it can be merged in all of them.
- // Multiple uses could happen on the same control-flow path, and we would
- // then increase the amount of work. In the future we could try to evaluate
- // whether all uses are on different control-flow paths (using dominance and
- // reverse-dominance information) and only perform the merge when they are.
- HInstruction* accumulator = nullptr;
- HVecBinaryOperation* binop = use->AsVecBinaryOperation();
- HInstruction* binop_left = binop->GetLeft();
- HInstruction* binop_right = binop->GetRight();
- // This is always true since the `HVecMul` has only one use (which is checked above).
- DCHECK_NE(binop_left, binop_right);
- if (binop_right == mul) {
- accumulator = binop_left;
- } else if (use->IsVecAdd()) {
- DCHECK_EQ(binop_left, mul);
- accumulator = binop_right;
- }
-
- HInstruction::InstructionKind kind =
- use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
- if (accumulator != nullptr) {
- HVecMultiplyAccumulate* mulacc =
- new (arena) HVecMultiplyAccumulate(arena,
- kind,
- accumulator,
- mul->GetLeft(),
- mul->GetRight(),
- binop->GetPackedType(),
- binop->GetVectorLength());
-
- binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
- DCHECK(!mul->HasUses());
- mul->GetBlock()->RemoveInstruction(mul);
- return true;
- }
- }
- }
-
- return false;
-}
-
bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
if (index->IsConstant()) {
// If index is constant the whole address calculation often can be done by LDR/STR themselves.
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 371619fa2e..31e23833b1 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -58,7 +58,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
HInstruction* index,
size_t data_offset);
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
} // namespace art
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 5691dd0d4a..1ed1b7537e 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -470,6 +470,68 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) {
GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler());
}
+static void GenHighestOneBit(HInvoke* invoke, Primitive::Type type, MacroAssembler* masm) {
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+ UseScratchRegisterScope temps(masm);
+
+ Register src = InputRegisterAt(invoke, 0);
+ Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
+ Register temp = (type == Primitive::kPrimLong) ? temps.AcquireX() : temps.AcquireW();
+ size_t high_bit = (type == Primitive::kPrimLong) ? 63u : 31u;
+ size_t clz_high_bit = (type == Primitive::kPrimLong) ? 6u : 5u;
+
+ __ Clz(temp, src);
+ __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate)
+ __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0.
+ __ Lsr(dst, dst, temp);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ GenHighestOneBit(invoke, Primitive::kPrimInt, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) {
+ GenHighestOneBit(invoke, Primitive::kPrimLong, GetVIXLAssembler());
+}
+
+static void GenLowestOneBit(HInvoke* invoke, Primitive::Type type, MacroAssembler* masm) {
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+
+ UseScratchRegisterScope temps(masm);
+
+ Register src = InputRegisterAt(invoke, 0);
+ Register dst = RegisterFrom(invoke->GetLocations()->Out(), type);
+ Register temp = (type == Primitive::kPrimLong) ? temps.AcquireX() : temps.AcquireW();
+
+ __ Neg(temp, src);
+ __ And(dst, temp, src);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(invoke, Primitive::kPrimInt, GetVIXLAssembler());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(invoke, Primitive::kPrimLong, GetVIXLAssembler());
+}
+
static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -504,14 +566,6 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
}
-static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
- LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
static void GenAbsInteger(LocationSummary* locations,
bool is64bit,
MacroAssembler* masm) {
@@ -526,7 +580,7 @@ static void GenAbsInteger(LocationSummary* locations,
}
void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToInt(arena_, invoke);
+ CreateIntToIntLocations(arena_, invoke);
}
void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
@@ -534,7 +588,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToInt(arena_, invoke);
+ CreateIntToIntLocations(arena_, invoke);
}
void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
@@ -1579,12 +1633,13 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
}
// Assertions that must hold in order to compare strings 8 bytes at a time.
+ // Ok to do this because strings are zero-padded to kObjectAlignment.
DCHECK_ALIGNED(value_offset, 8);
static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
if (const_string != nullptr &&
- const_string_length < (is_compressed ? kShortConstStringEqualsCutoffInBytes
- : kShortConstStringEqualsCutoffInBytes / 2u)) {
+ const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
+ : kShortConstStringEqualsCutoffInBytes / 2u)) {
// Load and compare the contents. Though we know the contents of the short const string
// at compile time, materializing constants may be more code than loading from memory.
int32_t offset = value_offset;
@@ -1592,7 +1647,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u);
temp = temp.X();
temp1 = temp1.X();
- while (remaining_bytes > 8u) {
+ while (remaining_bytes > sizeof(uint64_t)) {
Register temp2 = XRegisterFrom(locations->GetTemp(0));
__ Ldp(temp, temp1, MemOperand(str.X(), offset));
__ Ldp(temp2, out, MemOperand(arg.X(), offset));
@@ -1628,7 +1683,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
temp1 = temp1.X();
Register temp2 = XRegisterFrom(locations->GetTemp(0));
// Loop to compare strings 8 bytes at a time starting at the front of the string.
- // Ok to do this because strings are zero-padded to kObjectAlignment.
__ Bind(&loop);
__ Ldr(out, MemOperand(str.X(), temp1));
__ Ldr(temp2, MemOperand(arg.X(), temp1));
@@ -2993,10 +3047,6 @@ void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) {
}
UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 8b4044d69b..d2dc88a73b 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -331,6 +331,14 @@ static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
}
+static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -375,11 +383,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* in
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ CreateLongToLongLocationsWithOverlap(arena_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -417,11 +421,7 @@ static void GenNumberOfTrailingZeros(HInvoke* invoke,
}
void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ CreateIntToIntLocations(arena_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
@@ -429,11 +429,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* i
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ CreateLongToLongLocationsWithOverlap(arena_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -1713,6 +1709,22 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
}
}
+// The cut off for unrolling the loop in String.equals() intrinsic for const strings.
+// The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
+// and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
+// instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
+// Allow up to 12 instructions (32 bytes) for the unrolled loop.
+constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
+
+static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
+ if (candidate->IsLoadString()) {
+ HLoadString* load_string = candidate->AsLoadString();
+ const DexFile& dex_file = load_string->GetDexFile();
+ return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
+ }
+ return nullptr;
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -1720,12 +1732,29 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
InvokeRuntimeCallingConventionARMVIXL calling_convention;
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
+
// Temporary registers to store lengths of strings and for calculations.
// Using instruction cbz requires a low register, so explicitly set a temp to be R0.
locations->AddTemp(LocationFrom(r0));
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
+ // For the generic implementation and for long const strings we need an extra temporary.
+ // We do not need it for short const strings, up to 4 bytes, see code generation below.
+ uint32_t const_string_length = 0u;
+ const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
+ if (const_string == nullptr) {
+ const_string = GetConstString(invoke->InputAt(1), &const_string_length);
+ }
+ bool is_compressed =
+ mirror::kUseStringCompression &&
+ const_string != nullptr &&
+ mirror::String::DexFileStringAllASCII(const_string, const_string_length);
+ if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+
+ // TODO: If the String.equals() is used only for an immediately following HIf, we can
+ // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
+ // Then we shall need an extra temporary register instead of the output register.
locations->SetOut(Location::RequiresRegister());
}
@@ -1738,8 +1767,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
vixl32::Register out = OutputRegister(invoke);
vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
- vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
- vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
vixl32::Label loop;
vixl32::Label end;
@@ -1771,52 +1798,109 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
__ Ldr(temp, MemOperand(str, class_offset));
- __ Ldr(temp1, MemOperand(arg, class_offset));
- __ Cmp(temp, temp1);
+ __ Ldr(out, MemOperand(arg, class_offset));
+ __ Cmp(temp, out);
__ B(ne, &return_false, /* far_target */ false);
}
- // Load `count` fields of this and argument strings.
- __ Ldr(temp, MemOperand(str, count_offset));
- __ Ldr(temp1, MemOperand(arg, count_offset));
- // Check if `count` fields are equal, return false if they're not.
- // Also compares the compression style, if differs return false.
- __ Cmp(temp, temp1);
- __ B(ne, &return_false, /* far_target */ false);
- // Return true if both strings are empty. Even with string compression `count == 0` means empty.
- static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
- "Expecting 0=compressed, 1=uncompressed");
- __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
+ // Check if one of the inputs is a const string. Do not special-case both strings
+ // being const, such cases should be handled by constant folding if needed.
+ uint32_t const_string_length = 0u;
+ const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
+ if (const_string == nullptr) {
+ const_string = GetConstString(invoke->InputAt(1), &const_string_length);
+ if (const_string != nullptr) {
+ std::swap(str, arg); // Make sure the const string is in `str`.
+ }
+ }
+ bool is_compressed =
+ mirror::kUseStringCompression &&
+ const_string != nullptr &&
+ mirror::String::DexFileStringAllASCII(const_string, const_string_length);
+
+ if (const_string != nullptr) {
+ // Load `count` field of the argument string and check if it matches the const string.
+ // Also compares the compression style, if differs return false.
+ __ Ldr(temp, MemOperand(arg, count_offset));
+ __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
+ __ B(ne, &return_false, /* far_target */ false);
+ } else {
+ // Load `count` fields of this and argument strings.
+ __ Ldr(temp, MemOperand(str, count_offset));
+ __ Ldr(out, MemOperand(arg, count_offset));
+ // Check if `count` fields are equal, return false if they're not.
+ // Also compares the compression style, if differs return false.
+ __ Cmp(temp, out);
+ __ B(ne, &return_false, /* far_target */ false);
+ }
// Assertions that must hold in order to compare strings 4 bytes at a time.
+ // Ok to do this because strings are zero-padded to kObjectAlignment.
DCHECK_ALIGNED(value_offset, 4);
static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
- if (mirror::kUseStringCompression) {
- // For string compression, calculate the number of bytes to compare (not chars).
- // This could in theory exceed INT32_MAX, so treat temp as unsigned.
- __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cs); // If uncompressed,
- __ add(cs, temp, temp, temp); // double the byte count.
- }
+ if (const_string != nullptr &&
+ const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
+ : kShortConstStringEqualsCutoffInBytes / 2u)) {
+ // Load and compare the contents. Though we know the contents of the short const string
+ // at compile time, materializing constants may be more code than loading from memory.
+ int32_t offset = value_offset;
+ size_t remaining_bytes =
+ RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
+ while (remaining_bytes > sizeof(uint32_t)) {
+ vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+ UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
+ vixl32::Register temp2 = scratch_scope.Acquire();
+ __ Ldrd(temp, temp1, MemOperand(str, offset));
+ __ Ldrd(temp2, out, MemOperand(arg, offset));
+ __ Cmp(temp, temp2);
+ __ B(ne, &return_false, /* far_label */ false);
+ __ Cmp(temp1, out);
+ __ B(ne, &return_false, /* far_label */ false);
+ offset += 2u * sizeof(uint32_t);
+ remaining_bytes -= 2u * sizeof(uint32_t);
+ }
+ if (remaining_bytes != 0u) {
+ __ Ldr(temp, MemOperand(str, offset));
+ __ Ldr(out, MemOperand(arg, offset));
+ __ Cmp(temp, out);
+ __ B(ne, &return_false, /* far_label */ false);
+ }
+ } else {
+ // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+ static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+ "Expecting 0=compressed, 1=uncompressed");
+ __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
- // Store offset of string value in preparation for comparison loop.
- __ Mov(temp1, value_offset);
+ if (mirror::kUseStringCompression) {
+ // For string compression, calculate the number of bytes to compare (not chars).
+ // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+ __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
+ ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cs); // If uncompressed,
+ __ add(cs, temp, temp, temp); // double the byte count.
+ }
- // Loop to compare strings 4 bytes at a time starting at the front of the string.
- // Ok to do this because strings are zero-padded to kObjectAlignment.
- __ Bind(&loop);
- __ Ldr(out, MemOperand(str, temp1));
- __ Ldr(temp2, MemOperand(arg, temp1));
- __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
- __ Cmp(out, temp2);
- __ B(ne, &return_false, /* far_target */ false);
- // With string compression, we have compared 4 bytes, otherwise 2 chars.
- __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
- __ B(hi, &loop, /* far_target */ false);
+ vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+ UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
+ vixl32::Register temp2 = scratch_scope.Acquire();
+
+ // Store offset of string value in preparation for comparison loop.
+ __ Mov(temp1, value_offset);
+
+ // Loop to compare strings 4 bytes at a time starting at the front of the string.
+ __ Bind(&loop);
+ __ Ldr(out, MemOperand(str, temp1));
+ __ Ldr(temp2, MemOperand(arg, temp1));
+ __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
+ __ Cmp(out, temp2);
+ __ B(ne, &return_false, /* far_target */ false);
+ // With string compression, we have compared 4 bytes, otherwise 2 chars.
+ __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
+ __ B(hi, &loop, /* far_target */ false);
+ }
// Return true and exit the function.
// If loop does not result in returning false, we return true.
@@ -2723,11 +2807,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ CreateLongToLongLocationsWithOverlap(arena_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
@@ -2753,11 +2833,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
- LocationSummary* locations = new (arena_) LocationSummary(invoke,
- LocationSummary::kNoCall,
- kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ CreateLongToLongLocationsWithOverlap(arena_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
@@ -2827,6 +2903,137 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
}
+static void GenHighestOneBit(HInvoke* invoke,
+ Primitive::Type type,
+ CodeGeneratorARMVIXL* codegen) {
+ DCHECK(Primitive::IsIntOrLongType(type));
+
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ const vixl32::Register temp = temps.Acquire();
+
+ if (type == Primitive::kPrimLong) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location in = locations->InAt(0);
+ Location out = locations->Out();
+
+ vixl32::Register in_reg_lo = LowRegisterFrom(in);
+ vixl32::Register in_reg_hi = HighRegisterFrom(in);
+ vixl32::Register out_reg_lo = LowRegisterFrom(out);
+ vixl32::Register out_reg_hi = HighRegisterFrom(out);
+
+ __ Mov(temp, 0x80000000); // Modified immediate.
+ __ Clz(out_reg_lo, in_reg_lo);
+ __ Clz(out_reg_hi, in_reg_hi);
+ __ Lsr(out_reg_lo, temp, out_reg_lo);
+ __ Lsrs(out_reg_hi, temp, out_reg_hi);
+
+ // Discard result for lowest 32 bits if highest 32 bits are not zero.
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used. If output is in a high register, then we generate
+ // 4 more bytes of code to avoid a branch.
+ Operand mov_src(0);
+ if (!out_reg_lo.IsLow()) {
+ __ Mov(LeaveFlags, temp, 0);
+ mov_src = Operand(temp);
+ }
+ ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+ __ it(ne);
+ __ mov(ne, out_reg_lo, mov_src);
+ } else {
+ vixl32::Register out = OutputRegister(invoke);
+ vixl32::Register in = InputRegisterAt(invoke, 0);
+
+ __ Mov(temp, 0x80000000); // Modified immediate.
+ __ Clz(out, in);
+ __ Lsr(out, temp, out);
+ }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ GenHighestOneBit(invoke, Primitive::kPrimInt, codegen_);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
+ CreateLongToLongLocationsWithOverlap(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
+ GenHighestOneBit(invoke, Primitive::kPrimLong, codegen_);
+}
+
+static void GenLowestOneBit(HInvoke* invoke,
+ Primitive::Type type,
+ CodeGeneratorARMVIXL* codegen) {
+ DCHECK(Primitive::IsIntOrLongType(type));
+
+ ArmVIXLAssembler* assembler = codegen->GetAssembler();
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ const vixl32::Register temp = temps.Acquire();
+
+ if (type == Primitive::kPrimLong) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location in = locations->InAt(0);
+ Location out = locations->Out();
+
+ vixl32::Register in_reg_lo = LowRegisterFrom(in);
+ vixl32::Register in_reg_hi = HighRegisterFrom(in);
+ vixl32::Register out_reg_lo = LowRegisterFrom(out);
+ vixl32::Register out_reg_hi = HighRegisterFrom(out);
+
+ __ Rsb(out_reg_hi, in_reg_hi, 0);
+ __ Rsb(out_reg_lo, in_reg_lo, 0);
+ __ And(out_reg_hi, out_reg_hi, in_reg_hi);
+ // The result of this operation is 0 iff in_reg_lo is 0
+ __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
+
+ // Discard result for highest 32 bits if lowest 32 bits are not zero.
+ // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+ // we check that the output is in a low register, so that a 16-bit MOV
+ // encoding can be used. If output is in a high register, then we generate
+ // 4 more bytes of code to avoid a branch.
+ Operand mov_src(0);
+ if (!out_reg_lo.IsLow()) {
+ __ Mov(LeaveFlags, temp, 0);
+ mov_src = Operand(temp);
+ }
+ ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
+ 2 * vixl32::k16BitT32InstructionSizeInBytes,
+ CodeBufferCheckScope::kExactSize);
+ __ it(ne);
+ __ mov(ne, out_reg_hi, mov_src);
+ } else {
+ vixl32::Register out = OutputRegister(invoke);
+ vixl32::Register in = InputRegisterAt(invoke, 0);
+
+ __ Rsb(temp, in, 0);
+ __ And(out, temp, in);
+ }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(invoke, Primitive::kPrimInt, codegen_);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateLongToLongLocationsWithOverlap(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(invoke, Primitive::kPrimLong, codegen_);
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
LocationSummary* locations = new (arena_) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -3124,10 +3331,6 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 4cea6dfdfb..2669d97d82 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -22,6 +22,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
#include "mirror/string.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index d785567e0f..74be954a75 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -22,6 +22,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
+#include "mirror/object_array-inl.h"
#include "mirror/string.h"
#include "scoped_thread_state_change-inl.h"
#include "thread.h"
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index 86fb8e0165..02bc254729 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -461,28 +461,15 @@ class HeapLocationCollector : public HGraphVisitor {
has_heap_stores_ = true;
}
- void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
- // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
- CreateReferenceInfoForReferenceType(new_instance);
- }
-
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
- CreateReferenceInfoForReferenceType(instruction);
- }
-
- void VisitSelect(HSelect* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ // Any new-instance or new-array cannot alias with references that
+ // pre-exist the new-instance/new-array. We append entries into
+ // ref_info_array_ which keeps track of the order of creation
+ // of reference values since we visit the blocks in reverse post order.
+ //
+ // By default, VisitXXX() (including VisitPhi()) calls VisitInstruction(),
+ // unless VisitXXX() is overridden. VisitInstanceFieldGet() etc. above
+ // also call CreateReferenceInfoForReferenceType() explicitly.
CreateReferenceInfoForReferenceType(instruction);
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 0ef7dcdb59..a249cacc93 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -256,6 +256,48 @@ static bool IsAddConst(HInstruction* instruction,
return false;
}
+// Detect reductions of the following forms,
+// under assumption phi has only *one* use:
+// x = x_phi + ..
+// x = x_phi - ..
+// x = max(x_phi, ..)
+// x = min(x_phi, ..)
+static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
+ if (reduction->IsAdd()) {
+ return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+ } else if (reduction->IsSub()) {
+ return reduction->InputAt(0) == phi;
+ } else if (reduction->IsInvokeStaticOrDirect()) {
+ switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
+ case Intrinsics::kMathMinIntInt:
+ case Intrinsics::kMathMinLongLong:
+ case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kMathMinDoubleDouble:
+ case Intrinsics::kMathMaxIntInt:
+ case Intrinsics::kMathMaxLongLong:
+ case Intrinsics::kMathMaxFloatFloat:
+ case Intrinsics::kMathMaxDoubleDouble:
+ return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+ default:
+ return false;
+ }
+ }
+ return false;
+}
+
+// Translates operation to reduction kind.
+static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) {
+ if (reduction->IsVecAdd() || reduction->IsVecSub()) {
+ return HVecReduce::kSum;
+ } else if (reduction->IsVecMin()) {
+ return HVecReduce::kMin;
+ } else if (reduction->IsVecMax()) {
+ return HVecReduce::kMax;
+ }
+ LOG(FATAL) << "Unsupported SIMD reduction";
+ UNREACHABLE();
+}
+
// Test vector restrictions.
static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
return (restrictions & tested) != 0;
@@ -280,18 +322,18 @@ static bool CheckInductionSetFullyRemoved(ArenaSet<HInstruction*>* iset) {
return false;
}
}
-
return true;
}
//
-// Class methods.
+// Public methods.
//
HLoopOptimization::HLoopOptimization(HGraph* graph,
CompilerDriver* compiler_driver,
- HInductionVarAnalysis* induction_analysis)
- : HOptimization(graph, kLoopOptimizationPassName),
+ HInductionVarAnalysis* induction_analysis,
+ OptimizingCompilerStats* stats)
+ : HOptimization(graph, kLoopOptimizationPassName, stats),
compiler_driver_(compiler_driver),
induction_range_(induction_analysis),
loop_allocator_(nullptr),
@@ -299,14 +341,15 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
top_loop_(nullptr),
last_loop_(nullptr),
iset_(nullptr),
- induction_simplication_count_(0),
+ reductions_(nullptr),
simplified_(false),
vector_length_(0),
vector_refs_(nullptr),
vector_peeling_candidate_(nullptr),
vector_runtime_test_a_(nullptr),
vector_runtime_test_b_(nullptr),
- vector_map_(nullptr) {
+ vector_map_(nullptr),
+ vector_permanent_map_(nullptr) {
}
void HLoopOptimization::Run() {
@@ -333,6 +376,10 @@ void HLoopOptimization::Run() {
last_loop_ = top_loop_ = nullptr;
}
+//
+// Loop setup and traversal.
+//
+
void HLoopOptimization::LocalRun() {
// Build the linear order using the phase-local allocator. This step enables building
// a loop hierarchy that properly reflects the outer-inner and previous-next relation.
@@ -351,19 +398,27 @@ void HLoopOptimization::LocalRun() {
// should use the global allocator.
if (top_loop_ != nullptr) {
ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ArenaSafeMap<HInstruction*, HInstruction*> reds(
+ std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
ArenaSafeMap<HInstruction*, HInstruction*> map(
std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ArenaSafeMap<HInstruction*, HInstruction*> perm(
+ std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
// Attach.
iset_ = &iset;
+ reductions_ = &reds;
vector_refs_ = &refs;
vector_map_ = &map;
+ vector_permanent_map_ = &perm;
// Traverse.
TraverseLoopsInnerToOuter(top_loop_);
// Detach.
iset_ = nullptr;
+ reductions_ = nullptr;
vector_refs_ = nullptr;
vector_map_ = nullptr;
+ vector_permanent_map_ = nullptr;
}
}
@@ -414,16 +469,12 @@ void HLoopOptimization::RemoveLoop(LoopNode* node) {
}
}
-void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
+bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
+ bool changed = false;
for ( ; node != nullptr; node = node->next) {
- // Visit inner loops first.
- uint32_t current_induction_simplification_count = induction_simplication_count_;
- if (node->inner != nullptr) {
- TraverseLoopsInnerToOuter(node->inner);
- }
- // Recompute induction information of this loop if the induction
- // of any inner loop has been simplified.
- if (current_induction_simplification_count != induction_simplication_count_) {
+ // Visit inner loops first. Recompute induction information for this
+ // loop if the induction of any inner loop has changed.
+ if (TraverseLoopsInnerToOuter(node->inner)) {
induction_range_.ReVisit(node->loop_info);
}
// Repeat simplifications in the loop-body until no more changes occur.
@@ -433,12 +484,14 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
simplified_ = false;
SimplifyInduction(node);
SimplifyBlocks(node);
+ changed = simplified_ || changed;
} while (simplified_);
// Optimize inner loop.
if (node->inner == nullptr) {
- OptimizeInnerLoop(node);
+ changed = OptimizeInnerLoop(node) || changed;
}
}
+ return changed;
}
//
@@ -455,20 +508,18 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) {
// for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- iset_->clear(); // prepare phi induction
if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
// Note that it's ok to have replaced uses after the loop with the last value, without
// being able to remove the cycle. Environment uses (which are the reason we may not be
- // able to remove the cycle) within the loop will still hold the right value.
+ // able to remove the cycle) within the loop will still hold the right value. We must
+ // have tried first, however, to replace outside uses.
if (CanRemoveCycle()) {
+ simplified_ = true;
for (HInstruction* i : *iset_) {
RemoveFromCycle(i);
}
-
- // Check that there are no records of the deleted instructions.
DCHECK(CheckInductionSetFullyRemoved(iset_));
- simplified_ = true;
}
}
}
@@ -511,21 +562,20 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
-void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
int64_t trip_count = 0;
if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
- return;
+ return false;
}
-
// Ensure there is only a single loop-body (besides the header).
HBasicBlock* body = nullptr;
for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
if (it.Current() != header) {
if (body != nullptr) {
- return;
+ return false;
}
body = it.Current();
}
@@ -533,27 +583,27 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
CHECK(body != nullptr);
// Ensure there is only a single exit point.
if (header->GetSuccessors().size() != 2) {
- return;
+ return false;
}
HBasicBlock* exit = (header->GetSuccessors()[0] == body)
? header->GetSuccessors()[1]
: header->GetSuccessors()[0];
// Ensure exit can only be reached by exiting loop.
if (exit->GetPredecessors().size() != 1) {
- return;
+ return false;
}
// Detect either an empty loop (no side effects other than plain iteration) or
// a trivial loop (just iterating once). Replace subsequent index uses, if any,
// with the last value and remove the loop, possibly after unrolling its body.
- HInstruction* phi = header->GetFirstPhi();
- iset_->clear(); // prepare phi induction
- if (TrySetSimpleLoopHeader(header)) {
+ HPhi* main_phi = nullptr;
+ if (TrySetSimpleLoopHeader(header, &main_phi)) {
bool is_empty = IsEmptyBody(body);
- if ((is_empty || trip_count == 1) &&
- TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
+ if (reductions_->empty() && // TODO: possible with some effort
+ (is_empty || trip_count == 1) &&
+ TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
if (!is_empty) {
// Unroll the loop-body, which sees initial value of the index.
- phi->ReplaceWith(phi->InputAt(0));
+ main_phi->ReplaceWith(main_phi->InputAt(0));
preheader->MergeInstructionsWith(body);
}
body->DisconnectAndDelete();
@@ -566,21 +616,20 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
preheader->AddDominatedBlock(exit);
exit->SetDominator(preheader);
RemoveLoop(node); // update hierarchy
- return;
+ return true;
}
}
-
// Vectorize loop, if possible and valid.
- if (kEnableVectorization) {
- iset_->clear(); // prepare phi induction
- if (TrySetSimpleLoopHeader(header) &&
- ShouldVectorize(node, body, trip_count) &&
- TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
- Vectorize(node, body, exit, trip_count);
- graph_->SetHasSIMD(true); // flag SIMD usage
- return;
- }
+ if (kEnableVectorization &&
+ TrySetSimpleLoopHeader(header, &main_phi) &&
+ ShouldVectorize(node, body, trip_count) &&
+ TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
+ Vectorize(node, body, exit, trip_count);
+ graph_->SetHasSIMD(true); // flag SIMD usage
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
+ return true;
}
+ return false;
}
//
@@ -621,6 +670,8 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
// aliased, as well as the property that references either point to the same
// array or to two completely disjoint arrays, i.e., no partial aliasing.
// Other than a few simply heuristics, no detailed subscript analysis is done.
+ // The scan over references also finds a suitable dynamic loop peeling candidate.
+ const ArrayReference* candidate = nullptr;
for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
for (auto j = i; ++j != vector_refs_->end(); ) {
if (i->type == j->type && (i->lhs || j->lhs)) {
@@ -656,7 +707,7 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
}
// Consider dynamic loop peeling for alignment.
- SetPeelingCandidate(trip_count);
+ SetPeelingCandidate(candidate, trip_count);
// Success!
return true;
@@ -679,14 +730,15 @@ void HLoopOptimization::Vectorize(LoopNode* node,
bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0;
// Adjust vector bookkeeping.
- iset_->clear(); // prepare phi induction
- bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_
+ HPhi* main_phi = nullptr;
+ bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets
DCHECK(is_simple_loop_header);
vector_header_ = header;
vector_body_ = block;
- // Generate dynamic loop peeling trip count, if needed:
- // ptc = <peeling-needed-for-candidate>
+ // Generate dynamic loop peeling trip count, if needed, under the assumption
+ // that the Android runtime guarantees at least "component size" alignment:
+ // ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size
HInstruction* ptc = nullptr;
if (vector_peeling_candidate_ != nullptr) {
DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count";
@@ -769,12 +821,20 @@ void HLoopOptimization::Vectorize(LoopNode* node,
/*unroll*/ 1);
}
+ // Link reductions to their final uses.
+ for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
+ if (i->first->IsPhi()) {
+ i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second));
+ }
+ }
+
// Remove the original loop by disconnecting the body block
// and removing all instructions from the header.
block->DisconnectAndDelete();
while (!header->GetFirstInstruction()->IsGoto()) {
header->RemoveInstruction(header->GetFirstInstruction());
}
+
// Update loop hierarchy: the old header now resides in the same outer loop
// as the old preheader. Note that we don't bother putting sequential
// loops back in the hierarchy at this point.
@@ -807,21 +867,10 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
vector_header_->AddInstruction(cond);
vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
vector_index_ = phi;
+ vector_permanent_map_->clear(); // preserved over unrolling
for (uint32_t u = 0; u < unroll; u++) {
- // Clear map, leaving loop invariants setup during unrolling.
- if (u == 0) {
- vector_map_->clear();
- } else {
- for (auto i = vector_map_->begin(); i != vector_map_->end(); ) {
- if (i->second->IsVecReplicateScalar()) {
- DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first));
- ++i;
- } else {
- i = vector_map_->erase(i);
- }
- }
- }
// Generate instruction map.
+ vector_map_->clear();
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
DCHECK(vectorized_def);
@@ -838,16 +887,23 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
}
}
}
+ // Generate the induction.
vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
Insert(vector_body_, vector_index_);
}
- // Finalize phi for the loop index.
+ // Finalize phi inputs for the reductions (if any).
+ for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
+ if (!i->first->IsPhi()) {
+ DCHECK(i->second->IsPhi());
+ GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first);
+ }
+ }
+ // Finalize phi inputs for the loop index.
phi->AddInput(lo);
phi->AddInput(vector_index_);
vector_index_ = phi;
}
-// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc.
bool HLoopOptimization::VectorizeDef(LoopNode* node,
HInstruction* instruction,
bool generate_code) {
@@ -877,6 +933,23 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
}
return false;
}
+ // Accept a left-hand-side reduction for
+ // (1) supported vector type,
+ // (2) vectorizable right-hand-side value.
+ auto redit = reductions_->find(instruction);
+ if (redit != reductions_->end()) {
+ Primitive::Type type = instruction->GetType();
+ if (TrySetVectorType(type, &restrictions) &&
+ VectorizeUse(node, instruction, generate_code, type, restrictions)) {
+ if (generate_code) {
+ HInstruction* new_red = vector_map_->Get(instruction);
+ vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
+ vector_permanent_map_->Overwrite(redit->second, new_red);
+ }
+ return true;
+ }
+ return false;
+ }
// Branch back okay.
if (instruction->IsGoto()) {
return true;
@@ -932,6 +1005,21 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
}
return true;
}
+ } else if (instruction->IsPhi()) {
+ // Accept particular phi operations.
+ if (reductions_->find(instruction) != reductions_->end()) {
+ // Deal with vector restrictions.
+ if (HasVectorRestrictions(restrictions, kNoReduction)) {
+ return false;
+ }
+ // Accept a reduction.
+ if (generate_code) {
+ GenerateVecReductionPhi(instruction->AsPhi());
+ }
+ return true;
+ }
+ // TODO: accept right-hand-side induction?
+ return false;
} else if (instruction->IsTypeConversion()) {
// Accept particular type conversions.
HTypeConversion* conversion = instruction->AsTypeConversion();
@@ -1118,18 +1206,18 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
case kArm:
case kThumb2:
// Allow vectorization for all ARM devices, because Android assumes that
- // ARM 32-bit always supports advanced SIMD.
+ // ARM 32-bit always supports advanced SIMD (64-bit SIMD).
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(2);
default:
break;
@@ -1137,15 +1225,15 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
return false;
case kArm64:
// Allow vectorization for all ARM devices, because Android assumes that
- // ARMv8 AArch64 always supports advanced SIMD.
+ // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1154,24 +1242,27 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
*restrictions |= kNoDiv | kNoMul | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
+ *restrictions |= kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
+ *restrictions |= kNoReduction;
return TrySetVectorLength(2);
default:
return false;
}
case kX86:
case kX86_64:
- // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors).
+ // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD).
if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
+ *restrictions |=
+ kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1180,10 +1271,10 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
*restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax; // -0.0 vs +0.0
+ *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax; // -0.0 vs +0.0
+ *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
return TrySetVectorLength(2);
default:
break;
@@ -1195,23 +1286,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1223,23 +1314,23 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoReduction;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(4);
case Primitive::kPrimDouble:
- *restrictions |= kNoMinMax; // min/max(x, NaN)
+ *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
return TrySetVectorLength(2);
default:
break;
@@ -1272,9 +1363,16 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type)
return;
}
// In vector code, explicit scalar expansion is needed.
- HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
- global_allocator_, org, type, vector_length_);
- vector_map_->Put(org, Insert(vector_preheader_, vector));
+ HInstruction* vector = nullptr;
+ auto it = vector_permanent_map_->find(org);
+ if (it != vector_permanent_map_->end()) {
+ vector = it->second; // reuse during unrolling
+ } else {
+ vector = new (global_allocator_) HVecReplicateScalar(
+ global_allocator_, org, type, vector_length_);
+ vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
+ }
+ vector_map_->Put(org, vector);
}
}
@@ -1310,8 +1408,6 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
global_allocator_, base, opa, type, vector_length_, is_string_char_at);
}
// Known dynamically enforced alignment?
- // TODO: detect offset + constant differences.
- // TODO: long run, static alignment analysis?
if (vector_peeling_candidate_ != nullptr &&
vector_peeling_candidate_->base == base &&
vector_peeling_candidate_->offset == offset) {
@@ -1331,6 +1427,78 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
vector_map_->Put(org, vector);
}
+void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
+ DCHECK(reductions_->find(phi) != reductions_->end());
+ DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
+ HInstruction* vector = nullptr;
+ if (vector_mode_ == kSequential) {
+ HPhi* new_phi = new (global_allocator_) HPhi(
+ global_allocator_, kNoRegNumber, 0, phi->GetType());
+ vector_header_->AddPhi(new_phi);
+ vector = new_phi;
+ } else {
+ // Link vector reduction back to prior unrolled update, or a first phi.
+ auto it = vector_permanent_map_->find(phi);
+ if (it != vector_permanent_map_->end()) {
+ vector = it->second;
+ } else {
+ HPhi* new_phi = new (global_allocator_) HPhi(
+ global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType);
+ vector_header_->AddPhi(new_phi);
+ vector = new_phi;
+ }
+ }
+ vector_map_->Put(phi, vector);
+}
+
+void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
+ HInstruction* new_phi = vector_map_->Get(phi);
+ HInstruction* new_init = reductions_->Get(phi);
+ HInstruction* new_red = vector_map_->Get(reduction);
+ // Link unrolled vector loop back to new phi.
+ for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) {
+ DCHECK(new_phi->IsVecOperation());
+ }
+ // Prepare the new initialization.
+ if (vector_mode_ == kVector) {
+ // Generate a [initial, 0, .., 0] vector.
+ new_init = Insert(
+ vector_preheader_,
+ new (global_allocator_) HVecSetScalars(
+ global_allocator_, &new_init, phi->GetType(), vector_length_, 1));
+ } else {
+ new_init = ReduceAndExtractIfNeeded(new_init);
+ }
+ // Set the phi inputs.
+ DCHECK(new_phi->IsPhi());
+ new_phi->AsPhi()->AddInput(new_init);
+ new_phi->AsPhi()->AddInput(new_red);
+ // New feed value for next phi (safe mutation in iteration).
+ reductions_->find(phi)->second = new_phi;
+}
+
+HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) {
+ if (instruction->IsPhi()) {
+ HInstruction* input = instruction->InputAt(1);
+ if (input->IsVecOperation()) {
+ Primitive::Type type = input->AsVecOperation()->GetPackedType();
+ HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
+ // Generate a vector reduction and scalar extract
+ // x = REDUCE( [x_1, .., x_n] )
+ // y = x_1
+ // along the exit of the defining loop.
+ HVecReduce::ReductionKind kind = GetReductionKind(input);
+ HInstruction* reduce = new (global_allocator_) HVecReduce(
+ global_allocator_, instruction, type, vector_length_, kind);
+ exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
+ instruction = new (global_allocator_) HVecExtractScalar(
+ global_allocator_, reduce, type, vector_length_, 0);
+ exit->InsertInstructionAfter(instruction, reduce);
+ }
+ }
+ return instruction;
+}
+
#define GENERATE_VEC(x, y) \
if (vector_mode_ == kVector) { \
vector = (x); \
@@ -1511,10 +1679,9 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
// Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
// (note whether the sign bit in wider precision is shifted in has no effect
// on the narrow precision computed by the idiom).
- int64_t distance = 0;
if ((instruction->IsShr() ||
instruction->IsUShr()) &&
- IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) {
+ IsInt64Value(instruction->InputAt(1), 1)) {
// Test for (a + b + c) >> 1 for optional constant c.
HInstruction* a = nullptr;
HInstruction* b = nullptr;
@@ -1559,6 +1726,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
vector_length_,
is_unsigned,
is_rounded));
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
} else {
GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
}
@@ -1586,9 +1754,11 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
return true;
}
-void HLoopOptimization::SetPeelingCandidate(int64_t trip_count ATTRIBUTE_UNUSED) {
+void HLoopOptimization::SetPeelingCandidate(const ArrayReference* candidate,
+ int64_t trip_count ATTRIBUTE_UNUSED) {
// Current heuristic: none.
// TODO: implement
+ vector_peeling_candidate_ = candidate;
}
uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
@@ -1616,13 +1786,17 @@ uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_
//
bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
+ // Start with empty phi induction.
+ iset_->clear();
+
// Special case Phis that have equivalent in a debuggable setup. Our graph checker isn't
// smart enough to follow strongly connected components (and it's probably not worth
// it to make it so). See b/33775412.
if (graph_->IsDebuggable() && phi->HasEquivalentPhi()) {
return false;
}
- DCHECK(iset_->empty());
+
+ // Lookup phi induction cycle.
ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
if (set != nullptr) {
for (HInstruction* i : *set) {
@@ -1634,6 +1808,7 @@ bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
} else if (!i->IsRemovable()) {
return false;
} else if (i != phi && restrict_uses) {
+ // Deal with regular uses.
for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
if (set->find(use.GetUser()) == set->end()) {
return false;
@@ -1647,17 +1822,65 @@ bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
return false;
}
-// Find: phi: Phi(init, addsub)
-// s: SuspendCheck
-// c: Condition(phi, bound)
-// i: If(c)
-// TODO: Find a less pattern matching approach?
-bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
+bool HLoopOptimization::TrySetPhiReduction(HPhi* phi) {
DCHECK(iset_->empty());
- HInstruction* phi = block->GetFirstPhi();
- if (phi != nullptr &&
- phi->GetNext() == nullptr &&
- TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) {
+ // Only unclassified phi cycles are candidates for reductions.
+ if (induction_range_.IsClassified(phi)) {
+ return false;
+ }
+ // Accept operations like x = x + .., provided that the phi and the reduction are
+ // used exactly once inside the loop, and by each other.
+ HInputsRef inputs = phi->GetInputs();
+ if (inputs.size() == 2) {
+ HInstruction* reduction = inputs[1];
+ if (HasReductionFormat(reduction, phi)) {
+ HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation();
+ int32_t use_count = 0;
+ bool single_use_inside_loop =
+ // Reduction update only used by phi.
+ reduction->GetUses().HasExactlyOneElement() &&
+ !reduction->HasEnvironmentUses() &&
+ // Reduction update is only use of phi inside the loop.
+ IsOnlyUsedAfterLoop(loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
+ iset_->size() == 1;
+ iset_->clear(); // leave the way you found it
+ if (single_use_inside_loop) {
+ // Link reduction back, and start recording feed value.
+ reductions_->Put(reduction, phi);
+ reductions_->Put(phi, phi->InputAt(0));
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi) {
+ // Start with empty phi induction and reductions.
+ iset_->clear();
+ reductions_->clear();
+
+ // Scan the phis to find the following (the induction structure has already
+ // been optimized, so we don't need to worry about trivial cases):
+ // (1) optional reductions in loop,
+ // (2) the main induction, used in loop control.
+ HPhi* phi = nullptr;
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ if (TrySetPhiReduction(it.Current()->AsPhi())) {
+ continue;
+ } else if (phi == nullptr) {
+ // Found the first candidate for main induction.
+ phi = it.Current()->AsPhi();
+ } else {
+ return false;
+ }
+ }
+
+ // Then test for a typical loopheader:
+ // s: SuspendCheck
+ // c: Condition(phi, bound)
+ // i: If(c)
+ if (phi != nullptr && TrySetPhiInduction(phi, /*restrict_uses*/ false)) {
HInstruction* s = block->GetFirstInstruction();
if (s != nullptr && s->IsSuspendCheck()) {
HInstruction* c = s->GetNext();
@@ -1669,6 +1892,7 @@ bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
iset_->insert(c);
iset_->insert(s);
+ *main_phi = phi;
return true;
}
}
@@ -1692,6 +1916,7 @@ bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info,
HInstruction* instruction) {
+ // Deal with regular uses.
for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) {
return true;
@@ -1704,6 +1929,7 @@ bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
bool collect_loop_uses,
/*out*/ int32_t* use_count) {
+ // Deal with regular uses.
for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
HInstruction* user = use.GetUser();
if (iset_->find(user) == iset_->end()) { // not excluded?
@@ -1729,6 +1955,7 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info,
// Try to replace outside uses with the last value.
if (induction_range_.CanGenerateLastValue(instruction)) {
HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block);
+ // Deal with regular uses.
const HUseList<HInstruction*>& uses = instruction->GetUses();
for (auto it = uses.begin(), end = uses.end(); it != end;) {
HInstruction* user = it->GetUser();
@@ -1744,6 +1971,7 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info,
induction_range_.Replace(user, instruction, replacement); // update induction
}
}
+ // Deal with environment uses.
const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) {
HEnvironment* user = it->GetUser();
@@ -1759,7 +1987,6 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info,
}
}
}
- induction_simplication_count_++;
return true;
}
return false;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index de4bd85fc8..f34751815b 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -34,7 +34,8 @@ class HLoopOptimization : public HOptimization {
public:
HLoopOptimization(HGraph* graph,
CompilerDriver* compiler_driver,
- HInductionVarAnalysis* induction_analysis);
+ HInductionVarAnalysis* induction_analysis,
+ OptimizingCompilerStats* stats);
void Run() OVERRIDE;
@@ -62,17 +63,18 @@ class HLoopOptimization : public HOptimization {
* Vectorization restrictions (bit mask).
*/
enum VectorRestrictions {
- kNone = 0, // no restrictions
- kNoMul = 1, // no multiplication
- kNoDiv = 2, // no division
- kNoShift = 4, // no shift
- kNoShr = 8, // no arithmetic shift right
- kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
- kNoSignedHAdd = 32, // no signed halving add
- kNoUnroundedHAdd = 64, // no unrounded halving add
- kNoAbs = 128, // no absolute value
- kNoMinMax = 256, // no min/max
- kNoStringCharAt = 512, // no StringCharAt
+ kNone = 0, // no restrictions
+ kNoMul = 1 << 0, // no multiplication
+ kNoDiv = 1 << 1, // no division
+ kNoShift = 1 << 2, // no shift
+ kNoShr = 1 << 3, // no arithmetic shift right
+ kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits
+ kNoSignedHAdd = 1 << 5, // no signed halving add
+ kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
+ kNoAbs = 1 << 7, // no absolute value
+ kNoMinMax = 1 << 8, // no min/max
+ kNoStringCharAt = 1 << 9, // no StringCharAt
+ kNoReduction = 1 << 10, // no reduction
};
/*
@@ -104,18 +106,33 @@ class HLoopOptimization : public HOptimization {
bool lhs; // def/use
};
+ //
// Loop setup and traversal.
+ //
+
void LocalRun();
void AddLoop(HLoopInformation* loop_info);
void RemoveLoop(LoopNode* node);
- void TraverseLoopsInnerToOuter(LoopNode* node);
+ // Traverses all loops inner to outer to perform simplifications and optimizations.
+ // Returns true if loops nested inside current loop (node) have changed.
+ bool TraverseLoopsInnerToOuter(LoopNode* node);
+
+ //
// Optimization.
+ //
+
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- void OptimizeInnerLoop(LoopNode* node);
+ // Performs optimizations specific to inner loop (empty loop removal,
+ // unrolling, vectorization). Returns true if anything changed.
+ bool OptimizeInnerLoop(LoopNode* node);
+
+ //
// Vectorization analysis and synthesis.
+ //
+
bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
void GenerateNewLoop(LoopNode* node,
@@ -140,6 +157,9 @@ class HLoopOptimization : public HOptimization {
HInstruction* opb,
HInstruction* offset,
Primitive::Type type);
+ void GenerateVecReductionPhi(HPhi* phi);
+ void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
+ HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
void GenerateVecOp(HInstruction* org,
HInstruction* opa,
HInstruction* opb,
@@ -155,12 +175,20 @@ class HLoopOptimization : public HOptimization {
// Vectorization heuristics.
bool IsVectorizationProfitable(int64_t trip_count);
- void SetPeelingCandidate(int64_t trip_count);
+ void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count);
uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
+ //
// Helpers.
+ //
+
bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
- bool TrySetSimpleLoopHeader(HBasicBlock* block);
+ bool TrySetPhiReduction(HPhi* phi);
+
+ // Detects loop header with a single induction (returned in main_phi), possibly
+ // other phis for reductions, but no other side effects. Returns true on success.
+ bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi);
+
bool IsEmptyBody(HBasicBlock* block);
bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
@@ -200,10 +228,12 @@ class HLoopOptimization : public HOptimization {
// Contents reside in phase-local heap memory.
ArenaSet<HInstruction*>* iset_;
- // Counter that tracks how many induction cycles have been simplified. Useful
- // to trigger incremental updates of induction variable analysis of outer loops
- // when the induction of inner loops has changed.
- uint32_t induction_simplication_count_;
+ // Temporary bookkeeping of reduction instructions. Mapping is two-fold:
+ // (1) reductions in the loop-body are mapped back to their phi definition,
+ // (2) phi definitions are mapped to their initial value (updated during
+ // code generation to feed the proper values into the new chain).
+ // Contents reside in phase-local heap memory.
+ ArenaSafeMap<HInstruction*, HInstruction*>* reductions_;
// Flag that tracks if any simplifications have occurred.
bool simplified_;
@@ -228,6 +258,10 @@ class HLoopOptimization : public HOptimization {
// Contents reside in phase-local heap memory.
ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
+ // Permanent mapping used during vectorization synthesis.
+ // Contents reside in phase-local heap memory.
+ ArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
+
// Temporary vectorization bookkeeping.
VectorMode vector_mode_; // synthesis mode
HBasicBlock* vector_preheader_; // preheader of the new loop
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 5b9350689e..1c5603d00f 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -31,7 +31,7 @@ class LoopOptimizationTest : public CommonCompilerTest {
allocator_(&pool_),
graph_(CreateGraph(&allocator_)),
iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
- loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) {
+ loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_, nullptr)) {
BuildGraph();
}
@@ -195,4 +195,44 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure());
}
+// Check that SimplifyLoop() doesn't invalidate data flow when ordering loop headers'
+// predecessors.
+TEST_F(LoopOptimizationTest, SimplifyLoop) {
+ // Can't use AddLoop as we want special order for blocks predecessors.
+ HBasicBlock* header = new (&allocator_) HBasicBlock(graph_);
+ HBasicBlock* body = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(header);
+ graph_->AddBlock(body);
+
+ // Control flow: make a loop back edge first in the list of predecessors.
+ entry_block_->RemoveSuccessor(return_block_);
+ body->AddSuccessor(header);
+ entry_block_->AddSuccessor(header);
+ header->AddSuccessor(body);
+ header->AddSuccessor(return_block_);
+ DCHECK(header->GetSuccessors()[1] == return_block_);
+
+ // Data flow.
+ header->AddInstruction(new (&allocator_) HIf(parameter_));
+ body->AddInstruction(new (&allocator_) HGoto());
+
+ HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt);
+ HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, phi, parameter_);
+ header->AddPhi(phi);
+ body->AddInstruction(add);
+
+ phi->AddInput(add);
+ phi->AddInput(parameter_);
+
+ graph_->ClearLoopInformation();
+ graph_->ClearDominanceInformation();
+ graph_->BuildDominatorTree();
+
+ // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
+ // are still mapped correctly to the block predecessors.
+ for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
+ HInstruction* input = phi->InputAt(i);
+ ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
+ }
+}
} // namespace art
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index e34d4a2be6..217a8f29a8 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -358,6 +358,35 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) {
}
}
+// Reorder phi inputs to match reordering of the block's predecessors.
+static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) {
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* phi = it.Current()->AsPhi();
+ HInstruction* first_instr = phi->InputAt(first);
+ HInstruction* second_instr = phi->InputAt(second);
+ phi->ReplaceInput(first_instr, second);
+ phi->ReplaceInput(second_instr, first);
+ }
+}
+
+// Make sure that the first predecessor of a loop header is the incoming block.
+void HGraph::OrderLoopHeaderPredecessors(HBasicBlock* header) {
+ DCHECK(header->IsLoopHeader());
+ HLoopInformation* info = header->GetLoopInformation();
+ if (info->IsBackEdge(*header->GetPredecessors()[0])) {
+ HBasicBlock* to_swap = header->GetPredecessors()[0];
+ for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) {
+ HBasicBlock* predecessor = header->GetPredecessors()[pred];
+ if (!info->IsBackEdge(*predecessor)) {
+ header->predecessors_[pred] = to_swap;
+ header->predecessors_[0] = predecessor;
+ FixPhisAfterPredecessorsReodering(header, 0, pred);
+ break;
+ }
+ }
+ }
+}
+
void HGraph::SimplifyLoop(HBasicBlock* header) {
HLoopInformation* info = header->GetLoopInformation();
@@ -381,18 +410,7 @@ void HGraph::SimplifyLoop(HBasicBlock* header) {
pre_header->AddSuccessor(header);
}
- // Make sure the first predecessor of a loop header is the incoming block.
- if (info->IsBackEdge(*header->GetPredecessors()[0])) {
- HBasicBlock* to_swap = header->GetPredecessors()[0];
- for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) {
- HBasicBlock* predecessor = header->GetPredecessors()[pred];
- if (!info->IsBackEdge(*predecessor)) {
- header->predecessors_[pred] = to_swap;
- header->predecessors_[0] = predecessor;
- break;
- }
- }
- }
+ OrderLoopHeaderPredecessors(header);
HInstruction* first_instruction = header->GetFirstInstruction();
if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) {
@@ -1774,6 +1792,10 @@ bool HBasicBlock::IsSingleGoto() const {
return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsGoto();
}
+bool HBasicBlock::IsSingleReturn() const {
+ return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsReturn();
+}
+
bool HBasicBlock::IsSingleTryBoundary() const {
return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsTryBoundary();
}
@@ -2810,6 +2832,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
}
switch (GetLoadKind()) {
case LoadKind::kBootImageAddress:
+ case LoadKind::kBootImageInternTable:
case LoadKind::kJitTableAddress: {
ScopedObjectAccess soa(Thread::Current());
return GetString().Get() == other_load_string->GetString().Get();
@@ -2840,6 +2863,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
return os << "BootImageLinkTimePcRelative";
case HLoadString::LoadKind::kBootImageAddress:
return os << "BootImageAddress";
+ case HLoadString::LoadKind::kBootImageInternTable:
+ return os << "BootImageInternTable";
case HLoadString::LoadKind::kBssEntry:
return os << "BssEntry";
case HLoadString::LoadKind::kJitTableAddress:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3e4928bd65..6bf53f7147 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -418,6 +418,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor);
void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor);
+ void OrderLoopHeaderPredecessors(HBasicBlock* header);
void SimplifyLoop(HBasicBlock* header);
int32_t GetNextInstructionId() {
@@ -958,6 +959,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
}
bool IsSingleGoto() const;
+ bool IsSingleReturn() const;
bool IsSingleTryBoundary() const;
// Returns true if this block emits nothing but a jump.
@@ -1372,7 +1374,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
M(VecReplicateScalar, VecUnaryOperation) \
- M(VecSumReduce, VecUnaryOperation) \
+ M(VecExtractScalar, VecUnaryOperation) \
+ M(VecReduce, VecUnaryOperation) \
M(VecCnv, VecUnaryOperation) \
M(VecNeg, VecUnaryOperation) \
M(VecAbs, VecUnaryOperation) \
@@ -5869,6 +5872,10 @@ class HLoadString FINAL : public HInstruction {
// Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
kBootImageAddress,
+ // Use a PC-relative load from a boot image InternTable mmapped into the .bss
+ // of the oat file.
+ kBootImageInternTable,
+
// Load from an entry in the .bss section using a PC-relative load.
// Used for strings outside boot image when .bss is accessible with a PC-relative load.
kBssEntry,
@@ -5928,6 +5935,7 @@ class HLoadString FINAL : public HInstruction {
LoadKind load_kind = GetLoadKind();
if (load_kind == LoadKind::kBootImageLinkTimePcRelative ||
load_kind == LoadKind::kBootImageAddress ||
+ load_kind == LoadKind::kBootImageInternTable ||
load_kind == LoadKind::kJitTableAddress) {
return false;
}
@@ -5988,8 +5996,9 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
// The special input is used for PC-relative loads on some architectures,
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
- GetLoadKind() == LoadKind::kBssEntry ||
- GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind();
+ GetLoadKind() == LoadKind::kBootImageAddress ||
+ GetLoadKind() == LoadKind::kBootImageInternTable ||
+ GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
// HLoadString::GetInputRecords() returns an empty array at this point,
// so use the GetInputRecords() from the base class to set the input record.
DCHECK(special_input_.GetInstruction() == nullptr);
@@ -7039,6 +7048,17 @@ inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) {
return false;
}
+// Returns true iff instruction is the given integral constant.
+inline bool IsInt64Value(HInstruction* instruction, int64_t value) {
+ int64_t val = 0;
+ return IsInt64AndGet(instruction, &val) && val == value;
+}
+
+// Returns true iff instruction is a zero bit pattern.
+inline bool IsZeroBitPattern(HInstruction* instruction) {
+ return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
+}
+
#define INSTRUCTION_TYPE_CHECK(type, super) \
inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
inline const H##type* HInstruction::As##type() const { \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 6261171a00..886d75e5c7 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -63,6 +63,10 @@ class Alignment {
// GetVectorLength() x GetPackedType() operations simultaneously.
class HVecOperation : public HVariableInputSizeInstruction {
public:
+ // A SIMD operation looks like a FPU location.
+ // TODO: we could introduce SIMD types in HIR.
+ static constexpr Primitive::Type kSIMDType = Primitive::kPrimDouble;
+
HVecOperation(ArenaAllocator* arena,
Primitive::Type packed_type,
SideEffects side_effects,
@@ -89,10 +93,9 @@ class HVecOperation : public HVariableInputSizeInstruction {
return vector_length_ * Primitive::ComponentSize(GetPackedType());
}
- // Returns the type of the vector operation: a SIMD operation looks like a FPU location.
- // TODO: we could introduce SIMD types in HIR.
+ // Returns the type of the vector operation.
Primitive::Type GetType() const OVERRIDE {
- return Primitive::kPrimDouble;
+ return kSIMDType;
}
// Returns the true component type packed in a vector.
@@ -220,8 +223,11 @@ class HVecMemoryOperation : public HVecOperation {
DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
};
-// Packed type consistency checker (same vector length integral types may mix freely).
+// Packed type consistency checker ("same vector length" integral types may mix freely).
inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) {
+ if (input->IsPhi()) {
+ return input->GetType() == HVecOperation::kSIMDType; // carries SIMD
+ }
DCHECK(input->IsVecOperation());
Primitive::Type input_type = input->AsVecOperation()->GetPackedType();
switch (input_type) {
@@ -265,27 +271,77 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation {
DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
};
-// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
-// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
-class HVecSumReduce FINAL : public HVecUnaryOperation {
- HVecSumReduce(ArenaAllocator* arena,
- HInstruction* input,
- Primitive::Type packed_type,
- size_t vector_length,
- uint32_t dex_pc = kNoDexPc)
+// Extracts a particular scalar from the given vector,
+// viz. extract[ x1, .. , xn ] = x_i.
+//
+// TODO: for now only i == 1 case supported.
+class HVecExtractScalar FINAL : public HVecUnaryOperation {
+ public:
+ HVecExtractScalar(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ size_t index,
+ uint32_t dex_pc = kNoDexPc)
: HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
+ DCHECK_LT(index, vector_length);
+ DCHECK_EQ(index, 0u);
+ }
+
+ // Yields a single component in the vector.
+ Primitive::Type GetType() const OVERRIDE {
+ return GetPackedType();
+ }
+
+ // An extract needs to stay in place, since SIMD registers are not
+ // kept alive across vector loop boundaries (yet).
+ bool CanBeMoved() const OVERRIDE { return false; }
+
+ DECLARE_INSTRUCTION(VecExtractScalar);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecExtractScalar);
+};
+
+// Reduces the given vector into the first element as sum/min/max,
+// viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi
+// and the "-" denotes "don't care" (implementation dependent).
+class HVecReduce FINAL : public HVecUnaryOperation {
+ public:
+ enum ReductionKind {
+ kSum = 1,
+ kMin = 2,
+ kMax = 3
+ };
+
+ HVecReduce(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ ReductionKind kind,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc),
+ kind_(kind) {
+ DCHECK(HasConsistentPackedTypes(input, packed_type));
}
- // TODO: probably integral promotion
- Primitive::Type GetType() const OVERRIDE { return GetPackedType(); }
+ ReductionKind GetKind() const { return kind_; }
bool CanBeMoved() const OVERRIDE { return true; }
- DECLARE_INSTRUCTION(VecSumReduce);
+ bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ DCHECK(other->IsVecReduce());
+ const HVecReduce* o = other->AsVecReduce();
+ return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind();
+ }
+
+ DECLARE_INSTRUCTION(VecReduce);
private:
- DISALLOW_COPY_AND_ASSIGN(HVecSumReduce);
+ const ReductionKind kind_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecReduce);
};
// Converts every component in the vector,
@@ -754,20 +810,23 @@ class HVecUShr FINAL : public HVecBinaryOperation {
//
// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m,
+// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n.
class HVecSetScalars FINAL : public HVecOperation {
+ public:
HVecSetScalars(ArenaAllocator* arena,
HInstruction** scalars, // array
Primitive::Type packed_type,
size_t vector_length,
+ size_t number_of_scalars,
uint32_t dex_pc = kNoDexPc)
: HVecOperation(arena,
packed_type,
SideEffects::None(),
- /* number_of_inputs */ vector_length,
+ number_of_scalars,
vector_length,
dex_pc) {
- for (size_t i = 0; i < vector_length; i++) {
+ for (size_t i = 0; i < number_of_scalars; i++) {
DCHECK(!scalars[i]->IsVecOperation());
SetRawInputAt(0, scalars[i]);
}
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index 0238ea4602..5a56a2c210 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -332,4 +332,32 @@ TEST_F(NodesVectorTest, VectorOperationMattersOnMultiplyAccumulate) {
EXPECT_FALSE(v1->Equals(v3)); // different vector lengths
}
+TEST_F(NodesVectorTest, VectorKindMattersOnReduce) {
+ HVecOperation* v0 = new (&allocator_)
+ HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4);
+
+ HVecReduce* v1 = new (&allocator_) HVecReduce(
+ &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kSum);
+ HVecReduce* v2 = new (&allocator_) HVecReduce(
+ &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMin);
+ HVecReduce* v3 = new (&allocator_) HVecReduce(
+ &allocator_, v0, Primitive::kPrimInt, 4, HVecReduce::kMax);
+
+ EXPECT_FALSE(v0->CanBeMoved());
+ EXPECT_TRUE(v1->CanBeMoved());
+ EXPECT_TRUE(v2->CanBeMoved());
+ EXPECT_TRUE(v3->CanBeMoved());
+
+ EXPECT_EQ(HVecReduce::kSum, v1->GetKind());
+ EXPECT_EQ(HVecReduce::kMin, v2->GetKind());
+ EXPECT_EQ(HVecReduce::kMax, v3->GetKind());
+
+ EXPECT_TRUE(v1->Equals(v1));
+ EXPECT_TRUE(v2->Equals(v2));
+ EXPECT_TRUE(v3->Equals(v3));
+
+ EXPECT_FALSE(v1->Equals(v2)); // different kinds
+ EXPECT_FALSE(v1->Equals(v3));
+}
+
} // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 77a63acd18..fde55cb92f 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -148,27 +148,27 @@ static constexpr uint8_t expected_cfi_kMips[] = {
0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B,
0x0E, 0x40,
};
-// 0x00000000: addiu r29, r29, -64
+// 0x00000000: addiu sp, sp, -64
// 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sw r31, +60(r29)
+// 0x00000004: sw ra, +60(sp)
// 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +56(r29)
+// 0x00000008: sw s1, +56(sp)
// 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +52(r29)
+// 0x0000000c: sw s0, +52(sp)
// 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: sdc1 f22, +40(r29)
-// 0x00000014: sdc1 f20, +32(r29)
+// 0x00000010: sdc1 f22, +40(sp)
+// 0x00000014: sdc1 f20, +32(sp)
// 0x00000018: .cfi_remember_state
-// 0x00000018: lw r31, +60(r29)
+// 0x00000018: lw ra, +60(sp)
// 0x0000001c: .cfi_restore: r31
-// 0x0000001c: lw r17, +56(r29)
+// 0x0000001c: lw s1, +56(sp)
// 0x00000020: .cfi_restore: r17
-// 0x00000020: lw r16, +52(r29)
+// 0x00000020: lw s0, +52(sp)
// 0x00000024: .cfi_restore: r16
-// 0x00000024: ldc1 f22, +40(r29)
-// 0x00000028: ldc1 f20, +32(r29)
-// 0x0000002c: jr r31
-// 0x00000030: addiu r29, r29, 64
+// 0x00000024: ldc1 f22, +40(sp)
+// 0x00000028: ldc1 f20, +32(sp)
+// 0x0000002c: jr ra
+// 0x00000030: addiu sp, sp, 64
// 0x00000034: .cfi_def_cfa_offset: 0
// 0x00000034: .cfi_restore_state
// 0x00000034: .cfi_def_cfa_offset: 64
@@ -185,32 +185,32 @@ static constexpr uint8_t expected_cfi_kMips64[] = {
0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44,
0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: daddiu r29, r29, -64
+// 0x00000000: daddiu sp, sp, -64
// 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sd r31, +56(r29)
+// 0x00000004: sd ra, +56(sp)
// 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +48(r29)
+// 0x00000008: sd s1, +48(sp)
// 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +40(r29)
+// 0x0000000c: sd s0, +40(sp)
// 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +32(r29)
+// 0x00000010: sdc1 f25, +32(sp)
// 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +24(r29)
+// 0x00000014: sdc1 f24, +24(sp)
// 0x00000018: .cfi_offset: r56 at cfa-40
// 0x00000018: .cfi_remember_state
-// 0x00000018: ld r31, +56(r29)
+// 0x00000018: ld ra, +56(sp)
// 0x0000001c: .cfi_restore: r31
-// 0x0000001c: ld r17, +48(r29)
+// 0x0000001c: ld s1, +48(sp)
// 0x00000020: .cfi_restore: r17
-// 0x00000020: ld r16, +40(r29)
+// 0x00000020: ld s0, +40(sp)
// 0x00000024: .cfi_restore: r16
-// 0x00000024: ldc1 f25, +32(r29)
+// 0x00000024: ldc1 f25, +32(sp)
// 0x00000028: .cfi_restore: r57
-// 0x00000028: ldc1 f24, +24(r29)
+// 0x00000028: ldc1 f24, +24(sp)
// 0x0000002c: .cfi_restore: r56
-// 0x0000002c: daddiu r29, r29, 64
+// 0x0000002c: daddiu sp, sp, 64
// 0x00000030: .cfi_def_cfa_offset: 0
-// 0x00000030: jic r31, 0
+// 0x00000030: jic ra, 0
// 0x00000034: .cfi_restore_state
// 0x00000034: .cfi_def_cfa_offset: 64
@@ -330,7 +330,7 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
- 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+ 0x08, 0x00, 0x80, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
@@ -345,42 +345,42 @@ static constexpr uint8_t expected_cfi_kMips_adjust[] = {
0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: addiu r29, r29, -64
+// 0x00000000: addiu sp, sp, -64
// 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sw r31, +60(r29)
+// 0x00000004: sw ra, +60(sp)
// 0x00000008: .cfi_offset: r31 at cfa-4
-// 0x00000008: sw r17, +56(r29)
+// 0x00000008: sw s1, +56(sp)
// 0x0000000c: .cfi_offset: r17 at cfa-8
-// 0x0000000c: sw r16, +52(r29)
+// 0x0000000c: sw s0, +52(sp)
// 0x00000010: .cfi_offset: r16 at cfa-12
-// 0x00000010: sdc1 f22, +40(r29)
-// 0x00000014: sdc1 f20, +32(r29)
-// 0x00000018: bne r0, r4, 0x00000040 ; +36
-// 0x0000001c: addiu r29, r29, -4
+// 0x00000010: sdc1 f22, +40(sp)
+// 0x00000014: sdc1 f20, +32(sp)
+// 0x00000018: bnez a0, 0x0000003c ; +36
+// 0x0000001c: addiu sp, sp, -4
// 0x00000020: .cfi_def_cfa_offset: 68
-// 0x00000020: sw r31, +0(r29)
-// 0x00000024: bltzal r0, 0x0000002c ; +4
-// 0x00000028: lui r1, 0x20000
-// 0x0000002c: ori r1, r1, 24
-// 0x00000030: addu r1, r1, r31
-// 0x00000034: lw r31, +0(r29)
-// 0x00000038: jr r1
-// 0x0000003c: addiu r29, r29, 4
+// 0x00000020: sw ra, +0(sp)
+// 0x00000024: nal
+// 0x00000028: lui at, 2
+// 0x0000002c: ori at, at, 24
+// 0x00000030: addu at, at, ra
+// 0x00000034: lw ra, +0(sp)
+// 0x00000038: jr at
+// 0x0000003c: addiu sp, sp, 4
// 0x00000040: .cfi_def_cfa_offset: 64
// 0x00000040: nop
// ...
// 0x00020040: nop
// 0x00020044: .cfi_remember_state
-// 0x00020044: lw r31, +60(r29)
+// 0x00020044: lw ra, +60(sp)
// 0x00020048: .cfi_restore: r31
-// 0x00020048: lw r17, +56(r29)
+// 0x00020048: lw s1, +56(sp)
// 0x0002004c: .cfi_restore: r17
-// 0x0002004c: lw r16, +52(r29)
+// 0x0002004c: lw s0, +52(sp)
// 0x00020050: .cfi_restore: r16
-// 0x00020050: ldc1 f22, +40(r29)
-// 0x00020054: ldc1 f20, +32(r29)
-// 0x00020058: jr r31
-// 0x0002005c: addiu r29, r29, 64
+// 0x00020050: ldc1 f22, +40(sp)
+// 0x00020054: ldc1 f20, +32(sp)
+// 0x00020058: jr ra
+// 0x0002005c: addiu sp, sp, 64
// 0x00020060: .cfi_def_cfa_offset: 0
// 0x00020060: .cfi_restore_state
// 0x00020060: .cfi_def_cfa_offset: 64
@@ -401,37 +401,37 @@ static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E,
0x00, 0x44, 0x0B, 0x0E, 0x40,
};
-// 0x00000000: daddiu r29, r29, -64
+// 0x00000000: daddiu sp, sp, -64
// 0x00000004: .cfi_def_cfa_offset: 64
-// 0x00000004: sd r31, +56(r29)
+// 0x00000004: sd ra, +56(sp)
// 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +48(r29)
+// 0x00000008: sd s1, +48(sp)
// 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +40(r29)
+// 0x0000000c: sd s0, +40(sp)
// 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +32(r29)
+// 0x00000010: sdc1 f25, +32(sp)
// 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +24(r29)
+// 0x00000014: sdc1 f24, +24(sp)
// 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: bnec r5, r6, 0x00000024 ; +12
-// 0x0000001c: auipc r1, 2
-// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080
+// 0x00000018: bnec a1, a2, 0x00000024 ; +12
+// 0x0000001c: auipc at, 2
+// 0x00000020: jic at, 12 ; bc 0x00020028 ; +131080
// 0x00000024: nop
// ...
// 0x00020024: nop
// 0x00020028: .cfi_remember_state
-// 0x00020028: ld r31, +56(r29)
+// 0x00020028: ld ra, +56(sp)
// 0x0002002c: .cfi_restore: r31
-// 0x0002002c: ld r17, +48(r29)
+// 0x0002002c: ld s1, +48(sp)
// 0x00020030: .cfi_restore: r17
-// 0x00020030: ld r16, +40(r29)
+// 0x00020030: ld s0, +40(sp)
// 0x00020034: .cfi_restore: r16
-// 0x00020034: ldc1 f25, +32(r29)
+// 0x00020034: ldc1 f25, +32(sp)
// 0x00020038: .cfi_restore: r57
-// 0x00020038: ldc1 f24, +24(r29)
+// 0x00020038: ldc1 f24, +24(sp)
// 0x0002003c: .cfi_restore: r56
-// 0x0002003c: daddiu r29, r29, 64
+// 0x0002003c: daddiu sp, sp, 64
// 0x00020040: .cfi_def_cfa_offset: 0
-// 0x00020040: jic r31, 0
+// 0x00020040: jic ra, 0
// 0x00020044: .cfi_restore_state
// 0x00020044: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index b45f3c6b33..399cd98983 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -22,8 +22,6 @@
#include <stdint.h>
-#include "android-base/strings.h"
-
#ifdef ART_ENABLE_CODEGEN_arm64
#include "instruction_simplifier_arm64.h"
#endif
@@ -492,7 +490,7 @@ static HOptimization* BuildOptimization(
} else if (opt_name == HSharpening::kSharpeningPassName) {
return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
} else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
- return new (arena) HSelectGenerator(graph, stats);
+ return new (arena) HSelectGenerator(graph, handles, stats);
} else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
return new (arena) HInductionVarAnalysis(graph);
} else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
@@ -512,7 +510,7 @@ static HOptimization* BuildOptimization(
} else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
return new (arena) SideEffectsAnalysis(graph);
} else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
- return new (arena) HLoopOptimization(graph, driver, most_recent_induction);
+ return new (arena) HLoopOptimization(graph, driver, most_recent_induction, stats);
} else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
return new (arena) CHAGuardOptimization(graph);
} else if (opt_name == CodeSinking::kCodeSinkingPassName) {
@@ -763,7 +761,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(
graph, codegen, driver, stats);
- HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
+ HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, handles, stats);
HConstantFolding* fold2 = new (arena) HConstantFolding(
graph, "constant_folding$after_inlining");
HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
@@ -775,7 +773,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
LICM* licm = new (arena) LICM(graph, *side_effects1, stats);
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
- HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
+ HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction, stats);
LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa, stats);
HSharpening* sharpening = new (arena) HSharpening(
@@ -1134,12 +1132,7 @@ Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
bool IsCompilingWithCoreImage() {
const std::string& image = Runtime::Current()->GetImageLocation();
- // TODO: This is under-approximating...
- if (android::base::EndsWith(image, "core.art") ||
- android::base::EndsWith(image, "core-optimizing.art")) {
- return true;
- }
- return false;
+ return CompilerDriver::IsCoreImageFilename(image);
}
bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
@@ -1233,14 +1226,14 @@ bool OptimizingCompiler::JitCompile(Thread* self,
uint8_t* stack_map_data = nullptr;
uint8_t* method_info_data = nullptr;
uint8_t* roots_data = nullptr;
- code_cache->ReserveData(self,
- stack_map_size,
- method_info_size,
- number_of_roots,
- method,
- &stack_map_data,
- &method_info_data,
- &roots_data);
+ uint32_t data_size = code_cache->ReserveData(self,
+ stack_map_size,
+ method_info_size,
+ number_of_roots,
+ method,
+ &stack_map_data,
+ &method_info_data,
+ &roots_data);
if (stack_map_data == nullptr || roots_data == nullptr) {
return false;
}
@@ -1261,6 +1254,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
codegen->GetFpuSpillMask(),
code_allocator.GetMemory().data(),
code_allocator.GetSize(),
+ data_size,
osr,
roots,
codegen->GetGraph()->HasShouldDeoptimizeFlag(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index af7ab2f1a0..07f9635aba 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -63,6 +63,8 @@ enum MethodCompilationStat {
kBooleanSimplified,
kIntrinsicRecognized,
kLoopInvariantMoved,
+ kLoopVectorized,
+ kLoopVectorizedIdiom,
kSelectGenerated,
kRemovedInstanceOf,
kInlinedInvokeVirtualOrInterface,
@@ -184,6 +186,8 @@ class OptimizingCompilerStats {
case kBooleanSimplified : name = "BooleanSimplified"; break;
case kIntrinsicRecognized : name = "IntrinsicRecognized"; break;
case kLoopInvariantMoved : name = "LoopInvariantMoved"; break;
+ case kLoopVectorized : name = "LoopVectorized"; break;
+ case kLoopVectorizedIdiom : name = "LoopVectorizedIdiom"; break;
case kSelectGenerated : name = "SelectGenerated"; break;
case kRemovedInstanceOf: name = "RemovedInstanceOf"; break;
case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break;
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index 21b645279e..4cb99f9b5c 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -88,8 +88,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
void VisitLoadString(HLoadString* load_string) OVERRIDE {
HLoadString::LoadKind load_kind = load_string->GetLoadKind();
switch (load_kind) {
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+ case HLoadString::LoadKind::kBootImageAddress:
+ case HLoadString::LoadKind::kBootImageInternTable:
case HLoadString::LoadKind::kBssEntry:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index 2743df9dcf..c463ecdb0a 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -92,6 +92,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
void VisitLoadString(HLoadString* load_string) OVERRIDE {
HLoadString::LoadKind load_kind = load_string->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+ load_kind == HLoadString::LoadKind::kBootImageInternTable ||
load_kind == HLoadString::LoadKind::kBssEntry) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string);
load_string->AddSpecialInput(method_address);
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 561c9eafa2..93613a5542 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -754,8 +754,23 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
}
}
+void ReferenceTypePropagation::FixUpInstructionType(HInstruction* instruction,
+ VariableSizedHandleScope* handle_scope) {
+ if (instruction->IsSelect()) {
+ ScopedObjectAccess soa(Thread::Current());
+ HandleCache handle_cache(handle_scope);
+ HSelect* select = instruction->AsSelect();
+ ReferenceTypeInfo false_rti = select->GetFalseValue()->GetReferenceTypeInfo();
+ ReferenceTypeInfo true_rti = select->GetTrueValue()->GetReferenceTypeInfo();
+ select->SetReferenceTypeInfo(MergeTypes(false_rti, true_rti, &handle_cache));
+ } else {
+ LOG(FATAL) << "Invalid instruction in FixUpInstructionType";
+ }
+}
+
ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a,
- const ReferenceTypeInfo& b) {
+ const ReferenceTypeInfo& b,
+ HandleCache* handle_cache) {
if (!b.IsValid()) {
return a;
}
@@ -780,7 +795,7 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo&
is_exact = false;
} else if (!a_is_interface && !b_is_interface) {
result_type_handle =
- handle_cache_.NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
+ handle_cache->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
is_exact = false;
} else {
// This can happen if:
@@ -790,7 +805,7 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo&
// void foo(Interface i, boolean cond) {
// Object o = cond ? i : new Object();
// }
- result_type_handle = handle_cache_.GetObjectClassHandle();
+ result_type_handle = handle_cache->GetObjectClassHandle();
is_exact = false;
}
@@ -916,7 +931,7 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
if (inputs[i]->IsNullConstant()) {
continue;
}
- new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo());
+ new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo(), &handle_cache_);
if (new_rti.IsValid() && new_rti.IsObjectClass()) {
if (!new_rti.IsExact()) {
break;
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index b19f473e27..c221282b9b 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -54,6 +54,12 @@ class ReferenceTypePropagation : public HOptimization {
static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation";
+ // Fix the reference type for an instruction whose inputs have changed.
+ // For a select instruction, the reference types of the inputs are merged
+ // and the resulting reference type is set on the select instruction.
+ static void FixUpInstructionType(HInstruction* instruction,
+ VariableSizedHandleScope* handle_scope);
+
private:
class HandleCache {
public:
@@ -101,7 +107,9 @@ class ReferenceTypePropagation : public HOptimization {
static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache)
REQUIRES_SHARED(Locks::mutator_lock_);
- ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
+ static ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
+ const ReferenceTypeInfo& b,
+ HandleCache* handle_cache)
REQUIRES_SHARED(Locks::mutator_lock_);
void ValidateTypes();
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index d537459113..cb2af91d87 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -49,7 +49,7 @@ class ReferenceTypePropagationTest : public CommonCompilerTest {
// Relay method to merge type in reference type propagation.
ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a,
const ReferenceTypeInfo& b) REQUIRES_SHARED(Locks::mutator_lock_) {
- return propagation_->MergeTypes(a, b);
+ return propagation_->MergeTypes(a, b, &propagation_->handle_cache_);
}
// Helper method to construct an invalid type.
@@ -163,4 +163,3 @@ TEST_F(ReferenceTypePropagationTest, MergeValidTypes) {
}
} // namespace art
-
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 5ad011d8f9..38cd51bef6 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -554,6 +554,14 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN
}
void HScheduler::Schedule(HGraph* graph) {
+ // We run lsa here instead of in a separate pass to better control whether we
+ // should run the analysis or not.
+ LoadStoreAnalysis lsa(graph);
+ if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
+ lsa.Run();
+ scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector());
+ }
+
for (HBasicBlock* block : graph->GetReversePostOrder()) {
if (IsSchedulable(block)) {
Schedule(block);
@@ -566,14 +574,6 @@ void HScheduler::Schedule(HBasicBlock* block) {
// Build the scheduling graph.
scheduling_graph_.Clear();
-
- // Only perform LSA/HeapLocation analysis on the basic block that
- // is going to get instruction scheduled.
- HeapLocationCollector heap_location_collector(block->GetGraph());
- heap_location_collector.VisitBasicBlock(block);
- heap_location_collector.BuildAliasingMatrix();
- scheduling_graph_.SetHeapLocationCollector(heap_location_collector);
-
for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
CHECK_EQ(instruction->GetBlock(), block)
@@ -724,8 +724,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
instruction->IsClassTableGet() ||
instruction->IsCurrentMethod() ||
instruction->IsDivZeroCheck() ||
- instruction->IsInstanceFieldGet() ||
- instruction->IsInstanceFieldSet() ||
+ (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) ||
+ (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) ||
instruction->IsInstanceOf() ||
instruction->IsInvokeInterface() ||
instruction->IsInvokeStaticOrDirect() ||
@@ -741,14 +741,10 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
instruction->IsReturn() ||
instruction->IsReturnVoid() ||
instruction->IsSelect() ||
- instruction->IsStaticFieldGet() ||
- instruction->IsStaticFieldSet() ||
+ (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) ||
+ (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) ||
instruction->IsSuspendCheck() ||
- instruction->IsTypeConversion() ||
- instruction->IsUnresolvedInstanceFieldGet() ||
- instruction->IsUnresolvedInstanceFieldSet() ||
- instruction->IsUnresolvedStaticFieldGet() ||
- instruction->IsUnresolvedStaticFieldSet();
+ instruction->IsTypeConversion();
}
bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index ea15790105..d6eb6e3c52 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -20,6 +20,7 @@
#include "code_generator_utils.h"
#include "common_arm.h"
#include "mirror/array-inl.h"
+#include "mirror/string.h"
namespace art {
namespace arm {
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index f54d3f3de2..1d9d28ab24 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -18,6 +18,7 @@
#include "code_generator_utils.h"
#include "mirror/array-inl.h"
+#include "mirror/string.h"
namespace art {
namespace arm64 {
@@ -214,12 +215,12 @@ void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
last_visited_latency_ = kArm64SIMDReplicateOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+void SchedulingLatencyVisitorARM64::VisitVecExtractScalar(HVecExtractScalar* instr) {
+ HandleSimpleArithmeticSIMD(instr);
}
-void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) {
+ HandleSimpleArithmeticSIMD(instr);
}
void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
@@ -282,8 +283,8 @@ void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED)
last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
-void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
- LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64SIMDIntegerOpLatency;
}
void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
@@ -306,6 +307,10 @@ void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) {
HandleSimpleArithmeticSIMD(instr);
}
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+ HandleSimpleArithmeticSIMD(instr);
+}
+
void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
last_visited_latency_ = kArm64SIMDMulIntegerLatency;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 63d5b7d6b6..e1a80ec6fb 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -83,8 +83,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
M(SuspendCheck , unused) \
M(TypeConversion , unused) \
M(VecReplicateScalar , unused) \
- M(VecSetScalars , unused) \
- M(VecSumReduce , unused) \
+ M(VecExtractScalar , unused) \
+ M(VecReduce , unused) \
M(VecCnv , unused) \
M(VecNeg , unused) \
M(VecAbs , unused) \
@@ -103,6 +103,7 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
M(VecShl , unused) \
M(VecShr , unused) \
M(VecUShr , unused) \
+ M(VecSetScalars , unused) \
M(VecMultiplyAccumulate, unused) \
M(VecLoad , unused) \
M(VecStore , unused)
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index cb7ade915f..e220d32344 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -20,9 +20,16 @@ namespace art {
static constexpr size_t kMaxInstructionsInBranch = 1u;
-// Returns true if `block` has only one predecessor, ends with a Goto and
-// contains at most `kMaxInstructionsInBranch` other movable instruction with
-// no side-effects.
+HSelectGenerator::HSelectGenerator(HGraph* graph,
+ VariableSizedHandleScope* handles,
+ OptimizingCompilerStats* stats)
+ : HOptimization(graph, kSelectGeneratorPassName, stats),
+ handle_scope_(handles) {
+}
+
+// Returns true if `block` has only one predecessor, ends with a Goto
+// or a Return and contains at most `kMaxInstructionsInBranch` other
+// movable instruction with no side-effects.
static bool IsSimpleBlock(HBasicBlock* block) {
if (block->GetPredecessors().size() != 1u) {
return false;
@@ -33,7 +40,10 @@ static bool IsSimpleBlock(HBasicBlock* block) {
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
if (instruction->IsControlFlow()) {
- return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch;
+ if (num_instructions > kMaxInstructionsInBranch) {
+ return false;
+ }
+ return instruction->IsGoto() || instruction->IsReturn();
} else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
num_instructions++;
} else {
@@ -45,8 +55,8 @@ static bool IsSimpleBlock(HBasicBlock* block) {
UNREACHABLE();
}
-// Returns true if 'block1' and 'block2' are empty, merge into the same single
-// successor and the successor can only be reached from them.
+// Returns true if 'block1' and 'block2' are empty and merge into the
+// same single successor.
static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) {
return block1->GetSingleSuccessor() == block2->GetSingleSuccessor();
}
@@ -94,48 +104,68 @@ void HSelectGenerator::Run() {
// If the branches are not empty, move instructions in front of the If.
// TODO(dbrazdil): This puts an instruction between If and its condition.
// Implement moving of conditions to first users if possible.
- if (!true_block->IsSingleGoto()) {
+ if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
true_block->GetFirstInstruction()->MoveBefore(if_instruction);
}
- if (!false_block->IsSingleGoto()) {
+ if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
false_block->GetFirstInstruction()->MoveBefore(if_instruction);
}
- DCHECK(true_block->IsSingleGoto());
- DCHECK(false_block->IsSingleGoto());
+ DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
+ DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
// Find the resulting true/false values.
size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block);
size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block);
DCHECK_NE(predecessor_index_true, predecessor_index_false);
+ bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn();
HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false);
- if (phi == nullptr) {
+
+ HInstruction* true_value = nullptr;
+ HInstruction* false_value = nullptr;
+ if (both_successors_return) {
+ true_value = true_block->GetFirstInstruction()->InputAt(0);
+ false_value = false_block->GetFirstInstruction()->InputAt(0);
+ } else if (phi != nullptr) {
+ true_value = phi->InputAt(predecessor_index_true);
+ false_value = phi->InputAt(predecessor_index_false);
+ } else {
continue;
}
- HInstruction* true_value = phi->InputAt(predecessor_index_true);
- HInstruction* false_value = phi->InputAt(predecessor_index_false);
+ DCHECK(both_successors_return || phi != nullptr);
// Create the Select instruction and insert it in front of the If.
HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0),
true_value,
false_value,
if_instruction->GetDexPc());
- if (phi->GetType() == Primitive::kPrimNot) {
+ if (both_successors_return) {
+ if (true_value->GetType() == Primitive::kPrimNot) {
+ DCHECK(false_value->GetType() == Primitive::kPrimNot);
+ ReferenceTypePropagation::FixUpInstructionType(select, handle_scope_);
+ }
+ } else if (phi->GetType() == Primitive::kPrimNot) {
select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo());
}
block->InsertInstructionBefore(select, if_instruction);
- // Remove the true branch which removes the corresponding Phi input.
- // If left only with the false branch, the Phi is automatically removed.
- phi->ReplaceInput(select, predecessor_index_false);
+ // Remove the true branch which removes the corresponding Phi
+ // input if needed. If left only with the false branch, the Phi is
+ // automatically removed.
+ if (both_successors_return) {
+ false_block->GetFirstInstruction()->ReplaceInput(select, 0);
+ } else {
+ phi->ReplaceInput(select, predecessor_index_false);
+ }
+
bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u);
true_block->DisconnectAndDelete();
- DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
// Merge remaining blocks which are now connected with Goto.
DCHECK_EQ(block->GetSingleSuccessor(), false_block);
block->MergeWith(false_block);
- if (only_two_predecessors) {
+ if (!both_successors_return && only_two_predecessors) {
+ DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr);
DCHECK_EQ(block->GetSingleSuccessor(), merge_block);
block->MergeWith(merge_block);
}
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index c6dca581cc..c060146478 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -18,7 +18,7 @@
* This optimization recognizes the common diamond selection pattern and
* replaces it with an instance of the HSelect instruction.
*
- * Recognized pattern:
+ * Recognized patterns:
*
* If [ Condition ]
* / \
@@ -26,14 +26,30 @@
* \ /
* Phi [FalseValue, TrueValue]
*
+ * and
+ *
+ * If [ Condition ]
+ * / \
+ * false branch true branch
+ * return FalseValue return TrueValue
+ *
* The pattern will be simplified if `true_branch` and `false_branch` each
* contain at most one instruction without any side effects.
*
- * Blocks are merged into one and Select replaces the If and the Phi:
+ * Blocks are merged into one and Select replaces the If and the Phi.
+ *
+ * For the first pattern it simplifies to:
+ *
* true branch
* false branch
* Select [FalseValue, TrueValue, Condition]
*
+ * For the second pattern it simplifies to:
+ *
+ * true branch
+ * false branch
+ * return Select [FalseValue, TrueValue, Condition]
+ *
* Note: In order to recognize no side-effect blocks, this optimization must be
* run after the instruction simplifier has removed redundant suspend checks.
*/
@@ -42,19 +58,22 @@
#define ART_COMPILER_OPTIMIZING_SELECT_GENERATOR_H_
#include "optimization.h"
+#include "reference_type_propagation.h"
namespace art {
class HSelectGenerator : public HOptimization {
public:
- HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, kSelectGeneratorPassName, stats) {}
+ HSelectGenerator(HGraph* graph,
+ VariableSizedHandleScope* handles,
+ OptimizingCompilerStats* stats);
void Run() OVERRIDE;
static constexpr const char* kSelectGeneratorPassName = "select_generator";
private:
+ VariableSizedHandleScope* handle_scope_;
DISALLOW_COPY_AND_ASSIGN(HSelectGenerator);
};
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 9536d149f6..1ca63f4f86 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -278,10 +278,12 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) {
} else {
// AOT app compilation. Try to lookup the string without allocating if not found.
string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
- if (string != nullptr &&
- runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
- !codegen_->GetCompilerOptions().GetCompilePic()) {
- desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+ if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
+ if (codegen_->GetCompilerOptions().GetCompilePic()) {
+ desired_load_kind = HLoadString::LoadKind::kBootImageInternTable;
+ } else {
+ desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+ }
} else {
desired_load_kind = HLoadString::LoadKind::kBssEntry;
}
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index af3b4474e3..9df1b7434a 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -82,6 +82,22 @@ void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
}
}
+void ArmVIXLAssembler::GenerateMarkingRegisterCheck(vixl32::Register temp, int code) {
+ // The Marking Register is only used in the Baker read barrier configuration.
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ vixl32::Label mr_is_ok;
+
+ // temp = self.tls32_.is.gc_marking
+ ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value()));
+ // Check that mr == self.tls32_.is.gc_marking.
+ ___ Cmp(mr, temp);
+ ___ B(eq, &mr_is_ok, /* far_target */ false);
+ ___ Bkpt(code);
+ ___ Bind(&mr_is_ok);
+}
+
void ArmVIXLAssembler::LoadImmediate(vixl32::Register rd, int32_t value) {
// TODO(VIXL): Implement this optimization in VIXL.
if (!ShifterOperandCanAlwaysHold(value) && ShifterOperandCanAlwaysHold(~value)) {
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 66b22ea87c..9c11fd3222 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -178,6 +178,7 @@ class ArmVIXLAssembler FINAL : public Assembler {
//
// Heap poisoning.
//
+
// Poison a heap reference contained in `reg`.
void PoisonHeapReference(vixl32::Register reg);
// Unpoison a heap reference contained in `reg`.
@@ -187,6 +188,15 @@ class ArmVIXLAssembler FINAL : public Assembler {
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(vixl32::Register reg);
+ // Emit code checking the status of the Marking Register, and aborting
+ // the program if MR does not match the value stored in the art::Thread
+ // object.
+ //
+ // Argument `temp` is used as a temporary register to generate code.
+ // Argument `code` is used to identify the different occurrences of
+ // MaybeGenerateMarkingRegisterCheck and is passed to the BKPT instruction.
+ void GenerateMarkingRegisterCheck(vixl32::Register temp, int code = 0);
+
void StoreToOffset(StoreOperandType type,
vixl32::Register reg,
vixl32::Register base,
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 6ed0e9b670..d8a48a563c 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -158,6 +158,24 @@ void Arm64Assembler::MaybeUnpoisonHeapReference(Register reg) {
}
}
+void Arm64Assembler::GenerateMarkingRegisterCheck(Register temp, int code) {
+ // The Marking Register is only used in the Baker read barrier configuration.
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ vixl::aarch64::Register mr = reg_x(MR); // Marking Register.
+ vixl::aarch64::Register tr = reg_x(TR); // Thread Register.
+ vixl::aarch64::Label mr_is_ok;
+
+ // temp = self.tls32_.is.gc_marking
+ ___ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+ // Check that mr == self.tls32_.is.gc_marking.
+ ___ Cmp(mr.W(), temp);
+ ___ B(eq, &mr_is_ok);
+ ___ Brk(code);
+ ___ Bind(&mr_is_ok);
+}
+
#undef ___
} // namespace arm64
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index 5b8a34e56d..6b28363a8f 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -98,6 +98,15 @@ class Arm64Assembler FINAL : public Assembler {
// Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
void MaybeUnpoisonHeapReference(vixl::aarch64::Register reg);
+ // Emit code checking the status of the Marking Register, and aborting
+ // the program if MR does not match the value stored in the art::Thread
+ // object.
+ //
+ // Argument `temp` is used as a temporary register to generate code.
+ // Argument `code` is used to identify the different occurrences of
+ // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction.
+ void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0);
+
void Bind(Label* label ATTRIBUTE_UNUSED) OVERRIDE {
UNIMPLEMENTED(FATAL) << "Do not use Bind for ARM64";
}
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index bab84bea4c..9732b765a1 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -662,7 +662,7 @@ void Arm64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64());
}
-void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) {
+void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception* exception) {
UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
Register temp = temps.AcquireX();
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 59a1a48e20..a8ca1119e5 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -216,8 +216,15 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
*/
virtual DebugFrameOpCodeWriterForAssembler& cfi() = 0;
+ void SetEmitRunTimeChecksInDebugMode(bool value) {
+ emit_run_time_checks_in_debug_mode_ = value;
+ }
+
protected:
- explicit JNIMacroAssembler() {}
+ JNIMacroAssembler() {}
+
+ // Should run-time checks be emitted in debug mode?
+ bool emit_run_time_checks_in_debug_mode_ = false;
};
// A "Label" class used with the JNIMacroAssembler
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 85710d0811..d835c63443 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -31,9 +31,11 @@ namespace arm64 {
} // namespace arm64
namespace mips {
class MipsAssembler;
+ class MipsLabel;
} // namespace mips
namespace mips64 {
class Mips64Assembler;
+ class Mips64Label;
} // namespace mips64
namespace x86 {
class X86Assembler;
@@ -114,7 +116,9 @@ class Label {
friend class arm64::Arm64Assembler;
friend class mips::MipsAssembler;
+ friend class mips::MipsLabel;
friend class mips64::Mips64Assembler;
+ friend class mips64::Mips64Label;
friend class x86::X86Assembler;
friend class x86::NearLabel;
friend class x86_64::X86_64Assembler;
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 2cbabcfb32..b300cc597f 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -47,7 +47,8 @@ MipsAssembler::DelaySlot::DelaySlot()
fpr_outs_mask_(0),
fpr_ins_mask_(0),
cc_outs_mask_(0),
- cc_ins_mask_(0) {}
+ cc_ins_mask_(0),
+ patcher_label_(nullptr) {}
void MipsAssembler::DsFsmInstr(uint32_t instruction,
uint32_t gpr_outs_mask,
@@ -55,7 +56,8 @@ void MipsAssembler::DsFsmInstr(uint32_t instruction,
uint32_t fpr_outs_mask,
uint32_t fpr_ins_mask,
uint32_t cc_outs_mask,
- uint32_t cc_ins_mask) {
+ uint32_t cc_ins_mask,
+ MipsLabel* patcher_label) {
if (!reordering_) {
CHECK_EQ(ds_fsm_state_, kExpectingLabel);
CHECK_EQ(delay_slot_.instruction_, 0u);
@@ -96,6 +98,7 @@ void MipsAssembler::DsFsmInstr(uint32_t instruction,
delay_slot_.fpr_ins_mask_ = fpr_ins_mask;
delay_slot_.cc_outs_mask_ = cc_outs_mask;
delay_slot_.cc_ins_mask_ = cc_ins_mask;
+ delay_slot_.patcher_label_ = patcher_label;
}
void MipsAssembler::DsFsmLabel() {
@@ -167,8 +170,12 @@ void MipsAssembler::DsFsmInstrNop(uint32_t instruction ATTRIBUTE_UNUSED) {
DsFsmInstr(0, 0, 0, 0, 0, 0, 0);
}
-void MipsAssembler::DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2) {
- DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0);
+void MipsAssembler::DsFsmInstrRrr(uint32_t instruction,
+ Register out,
+ Register in1,
+ Register in2,
+ MipsLabel* patcher_label) {
+ DsFsmInstr(instruction, (1u << out), (1u << in1) | (1u << in2), 0, 0, 0, 0, patcher_label);
}
void MipsAssembler::DsFsmInstrRrrr(uint32_t instruction,
@@ -310,8 +317,8 @@ void MipsAssembler::EmitBranches() {
// Switch from appending instructions at the end of the buffer to overwriting
// existing instructions (branch placeholders) in the buffer.
overwriting_ = true;
- for (auto& branch : branches_) {
- EmitBranch(&branch);
+ for (size_t id = 0; id < branches_.size(); id++) {
+ EmitBranch(id);
}
overwriting_ = false;
}
@@ -531,8 +538,15 @@ void MipsAssembler::Addu(Register rd, Register rs, Register rt) {
DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt);
}
+void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+ if (patcher_label != nullptr) {
+ Bind(patcher_label);
+ }
+ DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs, patcher_label);
+}
+
void MipsAssembler::Addiu(Register rt, Register rs, uint16_t imm16) {
- DsFsmInstrRrr(EmitI(0x9, rs, rt, imm16), rt, rs, rs);
+ Addiu(rt, rs, imm16, /* patcher_label */ nullptr);
}
void MipsAssembler::Subu(Register rd, Register rs, Register rt) {
@@ -791,8 +805,15 @@ void MipsAssembler::Lh(Register rt, Register rs, uint16_t imm16) {
DsFsmInstrRrr(EmitI(0x21, rs, rt, imm16), rt, rs, rs);
}
+void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+ if (patcher_label != nullptr) {
+ Bind(patcher_label);
+ }
+ DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs, patcher_label);
+}
+
void MipsAssembler::Lw(Register rt, Register rs, uint16_t imm16) {
- DsFsmInstrRrr(EmitI(0x23, rs, rt, imm16), rt, rs, rs);
+ Lw(rt, rs, imm16, /* patcher_label */ nullptr);
}
void MipsAssembler::Lwl(Register rt, Register rs, uint16_t imm16) {
@@ -866,8 +887,15 @@ void MipsAssembler::Sh(Register rt, Register rs, uint16_t imm16) {
DsFsmInstrRrr(EmitI(0x29, rs, rt, imm16), ZERO, rt, rs);
}
+void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label) {
+ if (patcher_label != nullptr) {
+ Bind(patcher_label);
+ }
+ DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs, patcher_label);
+}
+
void MipsAssembler::Sw(Register rt, Register rs, uint16_t imm16) {
- DsFsmInstrRrr(EmitI(0x2b, rs, rt, imm16), ZERO, rt, rs);
+ Sw(rt, rs, imm16, /* patcher_label */ nullptr);
}
void MipsAssembler::Swl(Register rt, Register rs, uint16_t imm16) {
@@ -935,11 +963,11 @@ void MipsAssembler::Bne(Register rs, Register rt, uint16_t imm16) {
}
void MipsAssembler::Beqz(Register rt, uint16_t imm16) {
- Beq(ZERO, rt, imm16);
+ Beq(rt, ZERO, imm16);
}
void MipsAssembler::Bnez(Register rt, uint16_t imm16) {
- Bne(ZERO, rt, imm16);
+ Bne(rt, ZERO, imm16);
}
void MipsAssembler::Bltz(Register rt, uint16_t imm16) {
@@ -991,6 +1019,7 @@ void MipsAssembler::Jal(uint32_t addr26) {
void MipsAssembler::Jalr(Register rd, Register rs) {
uint32_t last_instruction = delay_slot_.instruction_;
+ MipsLabel* patcher_label = delay_slot_.patcher_label_;
bool exchange = (last_instruction != 0 &&
(delay_slot_.gpr_outs_mask_ & (1u << rs)) == 0 &&
((delay_slot_.gpr_ins_mask_ | delay_slot_.gpr_outs_mask_) & (1u << rd)) == 0);
@@ -1011,6 +1040,10 @@ void MipsAssembler::Jalr(Register rd, Register rs) {
CHECK_EQ(instr1, last_instruction);
buffer_.Store<uint32_t>(pos1, instr2);
buffer_.Store<uint32_t>(pos2, instr1);
+ // Move the patcher label along with the patched instruction.
+ if (patcher_label != nullptr) {
+ patcher_label->AdjustBoundPosition(sizeof(uint32_t));
+ }
} else if (reordering_) {
Nop();
}
@@ -3118,7 +3151,7 @@ void MipsAssembler::Branch::InitShortOrLong(MipsAssembler::Branch::OffsetBits of
}
void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) {
- OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
+ OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_);
if (is_r6) {
// R6
switch (initial_type) {
@@ -3131,23 +3164,31 @@ void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) {
type_ = kR6Literal;
break;
case kCall:
- InitShortOrLong(offset_size, kR6Call, kR6LongCall);
+ InitShortOrLong(offset_size_needed, kR6Call, kR6LongCall);
break;
case kCondBranch:
switch (condition_) {
case kUncond:
- InitShortOrLong(offset_size, kR6UncondBranch, kR6LongUncondBranch);
+ InitShortOrLong(offset_size_needed, kR6UncondBranch, kR6LongUncondBranch);
break;
case kCondEQZ:
case kCondNEZ:
// Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
- type_ = (offset_size <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
+ type_ = (offset_size_needed <= kOffset23) ? kR6CondBranch : kR6LongCondBranch;
break;
default:
- InitShortOrLong(offset_size, kR6CondBranch, kR6LongCondBranch);
+ InitShortOrLong(offset_size_needed, kR6CondBranch, kR6LongCondBranch);
break;
}
break;
+ case kBareCall:
+ type_ = kR6BareCall;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
+ case kBareCondBranch:
+ type_ = (condition_ == kUncond) ? kR6BareUncondBranch : kR6BareCondBranch;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
default:
LOG(FATAL) << "Unexpected branch type " << initial_type;
UNREACHABLE();
@@ -3164,18 +3205,26 @@ void MipsAssembler::Branch::InitializeType(Type initial_type, bool is_r6) {
type_ = kLiteral;
break;
case kCall:
- InitShortOrLong(offset_size, kCall, kLongCall);
+ InitShortOrLong(offset_size_needed, kCall, kLongCall);
break;
case kCondBranch:
switch (condition_) {
case kUncond:
- InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
+ InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch);
break;
default:
- InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
+ InitShortOrLong(offset_size_needed, kCondBranch, kLongCondBranch);
break;
}
break;
+ case kBareCall:
+ type_ = kBareCall;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
+ case kBareCondBranch:
+ type_ = (condition_ == kUncond) ? kBareUncondBranch : kBareCondBranch;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
default:
LOG(FATAL) << "Unexpected branch type " << initial_type;
UNREACHABLE();
@@ -3210,15 +3259,22 @@ bool MipsAssembler::Branch::IsUncond(BranchCondition condition, Register lhs, Re
}
}
-MipsAssembler::Branch::Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call)
+MipsAssembler::Branch::Branch(bool is_r6,
+ uint32_t location,
+ uint32_t target,
+ bool is_call,
+ bool is_bare)
: old_location_(location),
location_(location),
target_(target),
lhs_reg_(0),
rhs_reg_(0),
condition_(kUncond),
- delayed_instruction_(kUnfilledDelaySlot) {
- InitializeType((is_call ? kCall : kCondBranch), is_r6);
+ delayed_instruction_(kUnfilledDelaySlot),
+ patcher_label_(nullptr) {
+ InitializeType(
+ (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)),
+ is_r6);
}
MipsAssembler::Branch::Branch(bool is_r6,
@@ -3226,14 +3282,16 @@ MipsAssembler::Branch::Branch(bool is_r6,
uint32_t target,
MipsAssembler::BranchCondition condition,
Register lhs_reg,
- Register rhs_reg)
+ Register rhs_reg,
+ bool is_bare)
: old_location_(location),
location_(location),
target_(target),
lhs_reg_(lhs_reg),
rhs_reg_(rhs_reg),
condition_(condition),
- delayed_instruction_(kUnfilledDelaySlot) {
+ delayed_instruction_(kUnfilledDelaySlot),
+ patcher_label_(nullptr) {
CHECK_NE(condition, kUncond);
switch (condition) {
case kCondLT:
@@ -3276,7 +3334,7 @@ MipsAssembler::Branch::Branch(bool is_r6,
// Branch condition is always true, make the branch unconditional.
condition_ = kUncond;
}
- InitializeType(kCondBranch, is_r6);
+ InitializeType((is_bare ? kBareCondBranch : kCondBranch), is_r6);
}
MipsAssembler::Branch::Branch(bool is_r6,
@@ -3290,7 +3348,8 @@ MipsAssembler::Branch::Branch(bool is_r6,
lhs_reg_(dest_reg),
rhs_reg_(base_reg),
condition_(kUncond),
- delayed_instruction_(kUnfilledDelaySlot) {
+ delayed_instruction_(kUnfilledDelaySlot),
+ patcher_label_(nullptr) {
CHECK_NE(dest_reg, ZERO);
if (is_r6) {
CHECK_EQ(base_reg, ZERO);
@@ -3419,20 +3478,44 @@ uint32_t MipsAssembler::Branch::GetOldEndLocation() const {
return GetOldLocation() + GetOldSize();
}
+bool MipsAssembler::Branch::IsBare() const {
+ switch (type_) {
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ case kBareUncondBranch:
+ case kBareCondBranch:
+ case kBareCall:
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ case kR6BareUncondBranch:
+ case kR6BareCondBranch:
+ case kR6BareCall:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool MipsAssembler::Branch::IsLong() const {
switch (type_) {
- // R2 short branches.
+ // R2 short branches (can be promoted to long).
case kUncondBranch:
case kCondBranch:
case kCall:
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ case kBareUncondBranch:
+ case kBareCondBranch:
+ case kBareCall:
// R2 near label.
case kLabel:
// R2 near literal.
case kLiteral:
- // R6 short branches.
+ // R6 short branches (can be promoted to long).
case kR6UncondBranch:
case kR6CondBranch:
case kR6Call:
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ case kR6BareUncondBranch:
+ case kR6BareCondBranch:
+ case kR6BareCall:
// R6 near label.
case kR6Label:
// R6 near literal.
@@ -3464,8 +3547,9 @@ bool MipsAssembler::Branch::IsResolved() const {
}
MipsAssembler::Branch::OffsetBits MipsAssembler::Branch::GetOffsetSize() const {
+ bool r6_cond_branch = (type_ == kR6CondBranch || type_ == kR6BareCondBranch);
OffsetBits offset_size =
- (type_ == kR6CondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+ (r6_cond_branch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
? kOffset23
: branch_info_[type_].offset_size;
return offset_size;
@@ -3511,8 +3595,9 @@ void MipsAssembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
}
void MipsAssembler::Branch::PromoteToLong() {
+ CHECK(!IsBare()); // Bare branches do not promote.
switch (type_) {
- // R2 short branches.
+ // R2 short branches (can be promoted to long).
case kUncondBranch:
type_ = kLongUncondBranch;
break;
@@ -3530,7 +3615,7 @@ void MipsAssembler::Branch::PromoteToLong() {
case kLiteral:
type_ = kFarLiteral;
break;
- // R6 short branches.
+ // R6 short branches (can be promoted to long).
case kR6UncondBranch:
type_ = kR6LongUncondBranch;
break;
@@ -3585,7 +3670,7 @@ uint32_t MipsAssembler::Branch::PromoteIfNeeded(uint32_t location, uint32_t max_
}
// The following logic is for debugging/testing purposes.
// Promote some short branches to long when it's not really required.
- if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+ if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max() && !IsBare())) {
int64_t distance = static_cast<int64_t>(target_) - location;
distance = (distance >= 0) ? distance : -distance;
if (distance >= max_short_distance) {
@@ -3641,6 +3726,17 @@ const MipsAssembler::Branch* MipsAssembler::GetBranch(uint32_t branch_id) const
return &branches_[branch_id];
}
+void MipsAssembler::BindRelativeToPrecedingBranch(MipsLabel* label,
+ uint32_t prev_branch_id_plus_one,
+ uint32_t position) {
+ if (prev_branch_id_plus_one != 0) {
+ const Branch* branch = GetBranch(prev_branch_id_plus_one - 1);
+ position -= branch->GetEndLocation();
+ }
+ label->prev_branch_id_plus_one_ = prev_branch_id_plus_one;
+ label->BindTo(position);
+}
+
void MipsAssembler::Bind(MipsLabel* label) {
CHECK(!label->IsBound());
uint32_t bound_pc = buffer_.Size();
@@ -3666,22 +3762,15 @@ void MipsAssembler::Bind(MipsLabel* label) {
// Now make the label object contain its own location (relative to the end of the preceding
// branch, if any; it will be used by the branches referring to and following this label).
- label->prev_branch_id_plus_one_ = branches_.size();
- if (label->prev_branch_id_plus_one_) {
- uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
- const Branch* branch = GetBranch(branch_id);
- bound_pc -= branch->GetEndLocation();
- }
- label->BindTo(bound_pc);
+ BindRelativeToPrecedingBranch(label, branches_.size(), bound_pc);
}
uint32_t MipsAssembler::GetLabelLocation(const MipsLabel* label) const {
CHECK(label->IsBound());
uint32_t target = label->Position();
- if (label->prev_branch_id_plus_one_) {
+ if (label->prev_branch_id_plus_one_ != 0) {
// Get label location based on the branch preceding it.
- uint32_t branch_id = label->prev_branch_id_plus_one_ - 1;
- const Branch* branch = GetBranch(branch_id);
+ const Branch* branch = GetBranch(label->prev_branch_id_plus_one_ - 1);
target += branch->GetEndLocation();
}
return target;
@@ -3823,10 +3912,15 @@ uint32_t MipsAssembler::Branch::GetDelayedInstruction() const {
return delayed_instruction_;
}
-void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction) {
+MipsLabel* MipsAssembler::Branch::GetPatcherLabel() const {
+ return patcher_label_;
+}
+
+void MipsAssembler::Branch::SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label) {
CHECK_NE(instruction, kUnfilledDelaySlot);
CHECK_EQ(delayed_instruction_, kUnfilledDelaySlot);
delayed_instruction_ = instruction;
+ patcher_label_ = patcher_label;
}
void MipsAssembler::Branch::DecrementLocations() {
@@ -3851,6 +3945,10 @@ void MipsAssembler::Branch::DecrementLocations() {
}
void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) {
+ if (branch.IsBare()) {
+ // Delay slots are filled manually in bare branches.
+ return;
+ }
if (branch.CanHaveDelayedInstruction(delay_slot_)) {
// The last instruction cannot be used in a different delay slot,
// do not commit the label before it (if any).
@@ -3863,34 +3961,39 @@ void MipsAssembler::MoveInstructionToDelaySlot(Branch& branch) {
buffer_.Resize(size);
// Attach it to the branch and adjust the branch locations.
branch.DecrementLocations();
- branch.SetDelayedInstruction(delay_slot_.instruction_);
+ branch.SetDelayedInstruction(delay_slot_.instruction_, delay_slot_.patcher_label_);
} else if (!reordering_ && branch.GetType() == Branch::kUncondBranch) {
// If reordefing is disabled, prevent absorption of the target instruction.
branch.SetDelayedInstruction(Branch::kUnfillableDelaySlot);
}
}
-void MipsAssembler::Buncond(MipsLabel* label) {
+void MipsAssembler::Buncond(MipsLabel* label, bool is_r6, bool is_bare) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ false);
+ branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ false, is_bare);
MoveInstructionToDelaySlot(branches_.back());
FinalizeLabeledBranch(label);
}
-void MipsAssembler::Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs) {
+void MipsAssembler::Bcond(MipsLabel* label,
+ bool is_r6,
+ bool is_bare,
+ BranchCondition condition,
+ Register lhs,
+ Register rhs) {
// If lhs = rhs, this can be a NOP.
if (Branch::IsNop(condition, lhs, rhs)) {
return;
}
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(IsR6(), buffer_.Size(), target, condition, lhs, rhs);
+ branches_.emplace_back(is_r6, buffer_.Size(), target, condition, lhs, rhs, is_bare);
MoveInstructionToDelaySlot(branches_.back());
FinalizeLabeledBranch(label);
}
-void MipsAssembler::Call(MipsLabel* label) {
+void MipsAssembler::Call(MipsLabel* label, bool is_r6, bool is_bare) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(IsR6(), buffer_.Size(), target, /* is_call */ true);
+ branches_.emplace_back(is_r6, buffer_.Size(), target, /* is_call */ true, is_bare);
MoveInstructionToDelaySlot(branches_.back());
FinalizeLabeledBranch(label);
}
@@ -4038,10 +4141,14 @@ void MipsAssembler::PromoteBranches() {
// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] = {
- // R2 short branches.
+ // R2 short branches (can be promoted to long).
{ 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kUncondBranch
{ 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCondBranch
{ 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kCall
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kBareUncondBranch
+ { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kBareCondBranch
+ { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kBareCall
// R2 near label.
{ 1, 0, 0, MipsAssembler::Branch::kOffset16, 0 }, // kLabel
// R2 near literal.
@@ -4054,11 +4161,16 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] =
{ 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLabel
// R2 far literal.
{ 3, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kFarLiteral
- // R6 short branches.
+ // R6 short branches (can be promoted to long).
{ 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6UncondBranch
{ 2, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6CondBranch
// Exception: kOffset23 for beqzc/bnezc.
{ 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6Call
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6BareUncondBranch
+ { 1, 0, 1, MipsAssembler::Branch::kOffset18, 2 }, // kR6BareCondBranch
+ // Exception: kOffset23 for beqzc/bnezc.
+ { 1, 0, 1, MipsAssembler::Branch::kOffset28, 2 }, // kR6BareCall
// R6 near label.
{ 1, 0, 0, MipsAssembler::Branch::kOffset21, 2 }, // kR6Label
// R6 near literal.
@@ -4073,15 +4185,49 @@ const MipsAssembler::Branch::BranchInfo MipsAssembler::Branch::branch_info_[] =
{ 2, 0, 0, MipsAssembler::Branch::kOffset32, 0 }, // kR6FarLiteral
};
+static inline bool IsAbsorbableInstruction(uint32_t instruction) {
+ // The relative patcher patches addiu, lw and sw with an immediate operand of 0x5678.
+ // We want to make sure that these instructions do not get absorbed into delay slots
+ // of unconditional branches on R2. Absorption would otherwise make copies of
+ // unpatched instructions.
+ if ((instruction & 0xFFFF) != 0x5678) {
+ return true;
+ }
+ switch (instruction >> kOpcodeShift) {
+ case 0x09: // Addiu.
+ case 0x23: // Lw.
+ case 0x2B: // Sw.
+ return false;
+ default:
+ return true;
+ }
+}
+
// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
-void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
+void MipsAssembler::EmitBranch(uint32_t branch_id) {
CHECK_EQ(overwriting_, true);
+ Branch* branch = GetBranch(branch_id);
overwrite_location_ = branch->GetLocation();
uint32_t offset = branch->GetOffset(GetBranchOrPcRelBaseForEncoding(branch));
BranchCondition condition = branch->GetCondition();
Register lhs = branch->GetLeftRegister();
Register rhs = branch->GetRightRegister();
uint32_t delayed_instruction = branch->GetDelayedInstruction();
+ MipsLabel* patcher_label = branch->GetPatcherLabel();
+ if (patcher_label != nullptr) {
+ // Update the patcher label location to account for branch promotion and
+ // delay slot filling.
+ CHECK(patcher_label->IsBound());
+ uint32_t bound_pc = branch->GetLocation();
+ if (!branch->IsLong()) {
+ // Short branches precede delay slots.
+ // Long branches follow "delay slots".
+ bound_pc += sizeof(uint32_t);
+ }
+ // Rebind the label.
+ patcher_label->Reinitialize();
+ BindRelativeToPrecedingBranch(patcher_label, branch_id, bound_pc);
+ }
switch (branch->GetType()) {
// R2 short branches.
case Branch::kUncondBranch:
@@ -4097,8 +4243,11 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
if (offset != 0x7FFF) {
uint32_t target = branch->GetTarget();
if (std::binary_search(ds_fsm_target_pcs_.begin(), ds_fsm_target_pcs_.end(), target)) {
- delayed_instruction = buffer_.Load<uint32_t>(target);
- offset++;
+ uint32_t target_instruction = buffer_.Load<uint32_t>(target);
+ if (IsAbsorbableInstruction(target_instruction)) {
+ delayed_instruction = target_instruction;
+ offset++;
+ }
}
}
}
@@ -4124,6 +4273,21 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
Bal(offset);
Emit(delayed_instruction);
break;
+ case Branch::kBareUncondBranch:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ B(offset);
+ break;
+ case Branch::kBareCondBranch:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ EmitBcondR2(condition, lhs, rhs, offset);
+ break;
+ case Branch::kBareCall:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Bal(offset);
+ break;
// R2 near label.
case Branch::kLabel:
@@ -4249,6 +4413,21 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Balc(offset);
break;
+ case Branch::kR6BareUncondBranch:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Bc(offset);
+ break;
+ case Branch::kR6BareCondBranch:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ EmitBcondR6(condition, lhs, rhs, offset);
+ break;
+ case Branch::kR6BareCall:
+ DCHECK_EQ(delayed_instruction, Branch::kUnfilledDelaySlot);
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Balc(offset);
+ break;
// R6 near label.
case Branch::kR6Label:
@@ -4309,46 +4488,51 @@ void MipsAssembler::EmitBranch(MipsAssembler::Branch* branch) {
}
CHECK_EQ(overwrite_location_, branch->GetEndLocation());
CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
+ if (patcher_label != nullptr) {
+ // The patched instruction should look like one.
+ uint32_t patched_instruction = buffer_.Load<uint32_t>(GetLabelLocation(patcher_label));
+ CHECK(!IsAbsorbableInstruction(patched_instruction));
+ }
}
-void MipsAssembler::B(MipsLabel* label) {
- Buncond(label);
+void MipsAssembler::B(MipsLabel* label, bool is_bare) {
+ Buncond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare);
}
-void MipsAssembler::Bal(MipsLabel* label) {
- Call(label);
+void MipsAssembler::Bal(MipsLabel* label, bool is_bare) {
+ Call(label, /* is_r6 */ (IsR6() && !is_bare), is_bare);
}
-void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label) {
- Bcond(label, kCondEQ, rs, rt);
+void MipsAssembler::Beq(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQ, rs, rt);
}
-void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label) {
- Bcond(label, kCondNE, rs, rt);
+void MipsAssembler::Bne(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNE, rs, rt);
}
-void MipsAssembler::Beqz(Register rt, MipsLabel* label) {
- Bcond(label, kCondEQZ, rt);
+void MipsAssembler::Beqz(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondEQZ, rt);
}
-void MipsAssembler::Bnez(Register rt, MipsLabel* label) {
- Bcond(label, kCondNEZ, rt);
+void MipsAssembler::Bnez(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondNEZ, rt);
}
-void MipsAssembler::Bltz(Register rt, MipsLabel* label) {
- Bcond(label, kCondLTZ, rt);
+void MipsAssembler::Bltz(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLTZ, rt);
}
-void MipsAssembler::Bgez(Register rt, MipsLabel* label) {
- Bcond(label, kCondGEZ, rt);
+void MipsAssembler::Bgez(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGEZ, rt);
}
-void MipsAssembler::Blez(Register rt, MipsLabel* label) {
- Bcond(label, kCondLEZ, rt);
+void MipsAssembler::Blez(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondLEZ, rt);
}
-void MipsAssembler::Bgtz(Register rt, MipsLabel* label) {
- Bcond(label, kCondGTZ, rt);
+void MipsAssembler::Bgtz(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ (IsR6() && !is_bare), is_bare, kCondGTZ, rt);
}
bool MipsAssembler::CanExchangeWithSlt(Register rs, Register rt) const {
@@ -4399,74 +4583,130 @@ void MipsAssembler::GenerateSltForCondBranch(bool unsigned_slt, Register rs, Reg
}
}
-void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label) {
- if (IsR6()) {
- Bcond(label, kCondLT, rs, rt);
+void MipsAssembler::Blt(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ if (IsR6() && !is_bare) {
+ Bcond(label, IsR6(), is_bare, kCondLT, rs, rt);
} else if (!Branch::IsNop(kCondLT, rs, rt)) {
// Synthesize the instruction (not available on R2).
GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt);
- Bnez(AT, label);
+ Bnez(AT, label, is_bare);
}
}
-void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label) {
- if (IsR6()) {
- Bcond(label, kCondGE, rs, rt);
+void MipsAssembler::Bge(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ if (IsR6() && !is_bare) {
+ Bcond(label, IsR6(), is_bare, kCondGE, rs, rt);
} else if (Branch::IsUncond(kCondGE, rs, rt)) {
- B(label);
+ B(label, is_bare);
} else {
// Synthesize the instruction (not available on R2).
GenerateSltForCondBranch(/* unsigned_slt */ false, rs, rt);
- Beqz(AT, label);
+ Beqz(AT, label, is_bare);
}
}
-void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label) {
- if (IsR6()) {
- Bcond(label, kCondLTU, rs, rt);
+void MipsAssembler::Bltu(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ if (IsR6() && !is_bare) {
+ Bcond(label, IsR6(), is_bare, kCondLTU, rs, rt);
} else if (!Branch::IsNop(kCondLTU, rs, rt)) {
// Synthesize the instruction (not available on R2).
GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt);
- Bnez(AT, label);
+ Bnez(AT, label, is_bare);
}
}
-void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label) {
- if (IsR6()) {
- Bcond(label, kCondGEU, rs, rt);
+void MipsAssembler::Bgeu(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ if (IsR6() && !is_bare) {
+ Bcond(label, IsR6(), is_bare, kCondGEU, rs, rt);
} else if (Branch::IsUncond(kCondGEU, rs, rt)) {
- B(label);
+ B(label, is_bare);
} else {
// Synthesize the instruction (not available on R2).
GenerateSltForCondBranch(/* unsigned_slt */ true, rs, rt);
- Beqz(AT, label);
+ Beqz(AT, label, is_bare);
}
}
-void MipsAssembler::Bc1f(MipsLabel* label) {
- Bc1f(0, label);
+void MipsAssembler::Bc1f(MipsLabel* label, bool is_bare) {
+ Bc1f(0, label, is_bare);
}
-void MipsAssembler::Bc1f(int cc, MipsLabel* label) {
+void MipsAssembler::Bc1f(int cc, MipsLabel* label, bool is_bare) {
CHECK(IsUint<3>(cc)) << cc;
- Bcond(label, kCondF, static_cast<Register>(cc), ZERO);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondF, static_cast<Register>(cc), ZERO);
}
-void MipsAssembler::Bc1t(MipsLabel* label) {
- Bc1t(0, label);
+void MipsAssembler::Bc1t(MipsLabel* label, bool is_bare) {
+ Bc1t(0, label, is_bare);
}
-void MipsAssembler::Bc1t(int cc, MipsLabel* label) {
+void MipsAssembler::Bc1t(int cc, MipsLabel* label, bool is_bare) {
CHECK(IsUint<3>(cc)) << cc;
- Bcond(label, kCondT, static_cast<Register>(cc), ZERO);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondT, static_cast<Register>(cc), ZERO);
+}
+
+void MipsAssembler::Bc(MipsLabel* label, bool is_bare) {
+ Buncond(label, /* is_r6 */ true, is_bare);
+}
+
+void MipsAssembler::Balc(MipsLabel* label, bool is_bare) {
+ Call(label, /* is_r6 */ true, is_bare);
+}
+
+void MipsAssembler::Beqc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt);
+}
+
+void MipsAssembler::Bnec(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt);
+}
+
+void MipsAssembler::Beqzc(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rt);
+}
+
+void MipsAssembler::Bnezc(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rt);
+}
+
+void MipsAssembler::Bltzc(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt);
+}
+
+void MipsAssembler::Bgezc(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt);
+}
+
+void MipsAssembler::Blezc(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt);
+}
+
+void MipsAssembler::Bgtzc(Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt);
+}
+
+void MipsAssembler::Bltc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt);
+}
+
+void MipsAssembler::Bgec(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt);
+}
+
+void MipsAssembler::Bltuc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt);
+}
+
+void MipsAssembler::Bgeuc(Register rs, Register rt, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt);
}
-void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label) {
- Bcond(label, kCondF, static_cast<Register>(ft), ZERO);
+void MipsAssembler::Bc1eqz(FRegister ft, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<Register>(ft), ZERO);
}
-void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label) {
- Bcond(label, kCondT, static_cast<Register>(ft), ZERO);
+void MipsAssembler::Bc1nez(FRegister ft, MipsLabel* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<Register>(ft), ZERO);
}
void MipsAssembler::AdjustBaseAndOffset(Register& base,
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index a7ff931e7e..0f163ac83f 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -80,6 +80,12 @@ class MipsLabel : public Label {
MipsLabel(MipsLabel&& src)
: Label(std::move(src)), prev_branch_id_plus_one_(src.prev_branch_id_plus_one_) {}
+ void AdjustBoundPosition(int delta) {
+ CHECK(IsBound());
+ // Bound label's position is negative, hence decrementing it.
+ position_ -= delta;
+ }
+
private:
uint32_t prev_branch_id_plus_one_; // To get distance from preceding branch, if any.
@@ -215,6 +221,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
// Emit Machine Instructions.
void Addu(Register rd, Register rs, Register rt);
+ void Addiu(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
void Addiu(Register rt, Register rs, uint16_t imm16);
void Subu(Register rd, Register rs, Register rt);
@@ -272,6 +279,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
void Lb(Register rt, Register rs, uint16_t imm16);
void Lh(Register rt, Register rs, uint16_t imm16);
+ void Lw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
void Lw(Register rt, Register rs, uint16_t imm16);
void Lwl(Register rt, Register rs, uint16_t imm16);
void Lwr(Register rt, Register rs, uint16_t imm16);
@@ -287,6 +295,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
void Sb(Register rt, Register rs, uint16_t imm16);
void Sh(Register rt, Register rs, uint16_t imm16);
+ void Sw(Register rt, Register rs, uint16_t imm16, MipsLabel* patcher_label);
void Sw(Register rt, Register rs, uint16_t imm16);
void Swl(Register rt, Register rs, uint16_t imm16);
void Swr(Register rt, Register rs, uint16_t imm16);
@@ -636,29 +645,69 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
void LoadSConst32(FRegister r, int32_t value, Register temp);
void Addiu32(Register rt, Register rs, int32_t value, Register rtmp = AT);
- // These will generate R2 branches or R6 branches as appropriate and take care of
- // the delay/forbidden slots.
void Bind(MipsLabel* label);
- void B(MipsLabel* label);
- void Bal(MipsLabel* label);
- void Beq(Register rs, Register rt, MipsLabel* label);
- void Bne(Register rs, Register rt, MipsLabel* label);
- void Beqz(Register rt, MipsLabel* label);
- void Bnez(Register rt, MipsLabel* label);
- void Bltz(Register rt, MipsLabel* label);
- void Bgez(Register rt, MipsLabel* label);
- void Blez(Register rt, MipsLabel* label);
- void Bgtz(Register rt, MipsLabel* label);
- void Blt(Register rs, Register rt, MipsLabel* label);
- void Bge(Register rs, Register rt, MipsLabel* label);
- void Bltu(Register rs, Register rt, MipsLabel* label);
- void Bgeu(Register rs, Register rt, MipsLabel* label);
- void Bc1f(MipsLabel* label); // R2
- void Bc1f(int cc, MipsLabel* label); // R2
- void Bc1t(MipsLabel* label); // R2
- void Bc1t(int cc, MipsLabel* label); // R2
- void Bc1eqz(FRegister ft, MipsLabel* label); // R6
- void Bc1nez(FRegister ft, MipsLabel* label); // R6
+ // When `is_bare` is false, the branches will promote to long (if the range
+ // of the individual branch instruction is insufficient) and the delay/
+ // forbidden slots will be taken care of.
+ // Use `is_bare = false` when the branch target may be out of reach of the
+ // individual branch instruction. IOW, this is for general purpose use.
+ //
+ // When `is_bare` is true, just the branch instructions will be generated
+ // leaving delay/forbidden slot filling up to the caller and the branches
+ // won't promote to long if the range is insufficient (you'll get a
+ // compilation error when the range is exceeded).
+ // Use `is_bare = true` when the branch target is known to be within reach
+ // of the individual branch instruction. This is intended for small local
+ // optimizations around delay/forbidden slots.
+ // Also prefer using `is_bare = true` if the code near the branch is to be
+ // patched or analyzed at run time (e.g. introspection) to
+ // - show the intent and
+ // - fail during compilation rather than during patching/execution if the
+ // bare branch range is insufficent but the code size and layout are
+ // expected to remain unchanged
+ //
+ // R2 branches with delay slots that are also available on R6.
+ // On R6 when `is_bare` is false these convert to equivalent R6 compact
+ // branches (to reduce code size). On R2 or when `is_bare` is true they
+ // remain R2 branches with delay slots.
+ void B(MipsLabel* label, bool is_bare = false);
+ void Bal(MipsLabel* label, bool is_bare = false);
+ void Beq(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+ void Bne(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+ void Beqz(Register rt, MipsLabel* label, bool is_bare = false);
+ void Bnez(Register rt, MipsLabel* label, bool is_bare = false);
+ void Bltz(Register rt, MipsLabel* label, bool is_bare = false);
+ void Bgez(Register rt, MipsLabel* label, bool is_bare = false);
+ void Blez(Register rt, MipsLabel* label, bool is_bare = false);
+ void Bgtz(Register rt, MipsLabel* label, bool is_bare = false);
+ void Blt(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+ void Bge(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+ void Bltu(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+ void Bgeu(Register rs, Register rt, MipsLabel* label, bool is_bare = false);
+ // R2-only branches with delay slots.
+ void Bc1f(MipsLabel* label, bool is_bare = false); // R2
+ void Bc1f(int cc, MipsLabel* label, bool is_bare = false); // R2
+ void Bc1t(MipsLabel* label, bool is_bare = false); // R2
+ void Bc1t(int cc, MipsLabel* label, bool is_bare = false); // R2
+ // R6-only compact branches without delay/forbidden slots.
+ void Bc(MipsLabel* label, bool is_bare = false); // R6
+ void Balc(MipsLabel* label, bool is_bare = false); // R6
+ // R6-only compact branches with forbidden slots.
+ void Beqc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bnec(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Beqzc(Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bnezc(Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bltzc(Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bgezc(Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Blezc(Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bgtzc(Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bltc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bgec(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bltuc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6
+ void Bgeuc(Register rs, Register rt, MipsLabel* label, bool is_bare = false); // R6
+ // R6-only branches with delay slots.
+ void Bc1eqz(FRegister ft, MipsLabel* label, bool is_bare = false); // R6
+ void Bc1nez(FRegister ft, MipsLabel* label, bool is_bare = false); // R6
void EmitLoad(ManagedRegister m_dst, Register src_register, int32_t src_offset, size_t size);
void AdjustBaseAndOffset(Register& base,
@@ -1248,6 +1297,9 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
uint32_t cc_ins_mask_;
// Branches never operate on the LO and HI registers, hence there's
// no mask for LO and HI.
+
+ // Label for patchable instructions to allow moving them into delay slots.
+ MipsLabel* patcher_label_;
};
// Delay slot finite state machine's (DS FSM's) state. The FSM state is updated
@@ -1268,10 +1320,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
class Branch {
public:
enum Type {
- // R2 short branches.
+ // R2 short branches (can be promoted to long).
kUncondBranch,
kCondBranch,
kCall,
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ kBareUncondBranch,
+ kBareCondBranch,
+ kBareCall,
// R2 near label.
kLabel,
// R2 near literal.
@@ -1284,10 +1340,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
kFarLabel,
// R2 far literal.
kFarLiteral,
- // R6 short branches.
+ // R6 short branches (can be promoted to long).
kR6UncondBranch,
kR6CondBranch,
kR6Call,
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ kR6BareUncondBranch,
+ kR6BareCondBranch,
+ kR6BareCall,
// R6 near label.
kR6Label,
// R6 near literal.
@@ -1337,7 +1397,7 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
// instructions) from the instruction containing the offset.
uint32_t pc_org;
// How large (in bits) a PC-relative offset can be for a given type of branch (kR6CondBranch
- // is an exception: use kOffset23 for beqzc/bnezc).
+ // and kR6BareCondBranch are an exception: use kOffset23 for beqzc/bnezc).
OffsetBits offset_size;
// Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
// count.
@@ -1346,14 +1406,15 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
static const BranchInfo branch_info_[/* Type */];
// Unconditional branch or call.
- Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call);
+ Branch(bool is_r6, uint32_t location, uint32_t target, bool is_call, bool is_bare);
// Conditional branch.
Branch(bool is_r6,
uint32_t location,
uint32_t target,
BranchCondition condition,
Register lhs_reg,
- Register rhs_reg);
+ Register rhs_reg,
+ bool is_bare);
// Label address (in literal area) or literal.
Branch(bool is_r6,
uint32_t location,
@@ -1385,13 +1446,15 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
uint32_t GetOldSize() const;
uint32_t GetEndLocation() const;
uint32_t GetOldEndLocation() const;
+ bool IsBare() const;
bool IsLong() const;
bool IsResolved() const;
// Various helpers for branch delay slot management.
bool CanHaveDelayedInstruction(const DelaySlot& delay_slot) const;
- void SetDelayedInstruction(uint32_t instruction);
+ void SetDelayedInstruction(uint32_t instruction, MipsLabel* patcher_label = nullptr);
uint32_t GetDelayedInstruction() const;
+ MipsLabel* GetPatcherLabel() const;
void DecrementLocations();
// Returns the bit size of the signed offset that the branch instruction can handle.
@@ -1476,6 +1539,8 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
// kUnfillableDelaySlot if none and unfillable
// (the latter is only used for unconditional R2
// branches).
+
+ MipsLabel* patcher_label_; // Patcher label for the instruction in the delay slot.
};
friend std::ostream& operator<<(std::ostream& os, const Branch::Type& rhs);
friend std::ostream& operator<<(std::ostream& os, const Branch::OffsetBits& rhs);
@@ -1513,9 +1578,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
VectorRegister wd,
int minor_opcode);
- void Buncond(MipsLabel* label);
- void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
- void Call(MipsLabel* label);
+ void Buncond(MipsLabel* label, bool is_r6, bool is_bare);
+ void Bcond(MipsLabel* label,
+ bool is_r6,
+ bool is_bare,
+ BranchCondition condition,
+ Register lhs,
+ Register rhs = ZERO);
+ void Call(MipsLabel* label, bool is_r6, bool is_bare);
void FinalizeLabeledBranch(MipsLabel* label);
// Various helpers for branch delay slot management.
@@ -1525,9 +1595,14 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
uint32_t fpr_outs_mask,
uint32_t fpr_ins_mask,
uint32_t cc_outs_mask,
- uint32_t cc_ins_mask);
+ uint32_t cc_ins_mask,
+ MipsLabel* patcher_label = nullptr);
void DsFsmInstrNop(uint32_t instruction);
- void DsFsmInstrRrr(uint32_t instruction, Register out, Register in1, Register in2);
+ void DsFsmInstrRrr(uint32_t instruction,
+ Register out,
+ Register in1,
+ Register in2,
+ MipsLabel* patcher_label = nullptr);
void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3);
void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2);
void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3);
@@ -1550,12 +1625,15 @@ class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSi
const Branch* GetBranch(uint32_t branch_id) const;
uint32_t GetBranchLocationOrPcRelBase(const MipsAssembler::Branch* branch) const;
uint32_t GetBranchOrPcRelBaseForEncoding(const MipsAssembler::Branch* branch) const;
+ void BindRelativeToPrecedingBranch(MipsLabel* label,
+ uint32_t prev_branch_id_plus_one,
+ uint32_t position);
void EmitLiterals();
void ReserveJumpTableSpace();
void EmitJumpTables();
void PromoteBranches();
- void EmitBranch(Branch* branch);
+ void EmitBranch(uint32_t branch_id);
void EmitBranches();
void PatchCFI(size_t number_of_delayed_adjust_pcs);
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index b72a14e906..a5cd5a7c65 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -259,12 +259,86 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler,
return result;
}
+ void BranchHelper(void (mips::MipsAssembler::*f)(mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool has_slot,
+ bool is_bare = false) {
+ __ SetReorder(false);
+ mips::MipsLabel label1, label2;
+ (Base::GetAssembler()->*f)(&label1, is_bare);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label1);
+ (Base::GetAssembler()->*f)(&label2, is_bare);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label2);
+ (Base::GetAssembler()->*f)(&label1, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " 1f\n" +
+ ((is_bare || !has_slot) ? "" : "nop\n") +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ instr_name + " 2f\n" +
+ ((is_bare || !has_slot) ? "" : "nop\n") +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "2:\n" +
+ instr_name + " 1b\n" +
+ ((is_bare || !has_slot) ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, instr_name);
+ }
+
+ void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
+ mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ __ SetReorder(false);
+ mips::MipsLabel label;
+ (Base::GetAssembler()->*f)(mips::A0, &label, is_bare);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ (Base::GetAssembler()->*f)(mips::A1, &label, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " $a0, 1f\n" +
+ (is_bare ? "" : "nop\n") +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ instr_name + " $a1, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, instr_name);
+ }
+
void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
mips::Register,
- mips::MipsLabel*),
- const std::string& instr_name) {
+ mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ __ SetReorder(false);
mips::MipsLabel label;
- (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+ (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label, is_bare);
constexpr size_t kAdduCount1 = 63;
for (size_t i = 0; i != kAdduCount1; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
@@ -274,17 +348,52 @@ class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler,
for (size_t i = 0; i != kAdduCount2; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
}
- (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+ (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
std::string expected =
".set noreorder\n" +
- instr_name + " $a0, $a1, 1f\n"
- "nop\n" +
+ instr_name + " $a0, $a1, 1f\n" +
+ (is_bare ? "" : "nop\n") +
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
"1:\n" +
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- instr_name + " $a2, $a3, 1b\n"
- "nop\n";
+ instr_name + " $a2, $a3, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, instr_name);
+ }
+
+ void BranchFpuCondHelper(void (mips::MipsAssembler::*f)(mips::FRegister,
+ mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ __ SetReorder(false);
+ mips::MipsLabel label;
+ (Base::GetAssembler()->*f)(mips::F0, &label, is_bare);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ (Base::GetAssembler()->*f)(mips::F30, &label, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " $f0, 1f\n" +
+ (is_bare ? "" : "nop\n") +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ instr_name + " $f30, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
DriverStr(expected, instr_name);
}
@@ -947,78 +1056,386 @@ TEST_F(AssemblerMIPS32r6Test, StoreQToOffset) {
DriverStr(expected, "StoreQToOffset");
}
-TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) {
+//////////////
+// BRANCHES //
+//////////////
+
+TEST_F(AssemblerMIPS32r6Test, Bc) {
+ BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Balc) {
+ BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beqc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bnec) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beqzc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bnezc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltzc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgezc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Blezc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgtzc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgec) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltuc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgeuc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bc1eqz) {
+ BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bc1nez) {
+ BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez");
+}
+
+TEST_F(AssemblerMIPS32r6Test, B) {
+ BranchHelper(&mips::MipsAssembler::B, "Bc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bal) {
+ BranchHelper(&mips::MipsAssembler::Bal, "Balc", /* has_slot */ false);
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beq) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beqc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bne) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bnec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Beqz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bnez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Blez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blezc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgtz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Blt) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Bltc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bge) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bgec");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bltu) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Bgeu) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeuc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBc) {
+ BranchHelper(&mips::MipsAssembler::Bc, "Bc", /* has_slot */ false, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBalc) {
+ BranchHelper(&mips::MipsAssembler::Balc, "Balc", /* has_slot */ false, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeqc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Beqc, "Beqc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBnec) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bnec, "Bnec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeqzc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Beqzc, "Beqzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBnezc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bnezc, "Bnezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltzc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bltzc, "Bltzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgezc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgezc, "Bgezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBlezc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Blezc, "Blezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgtzc) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgtzc, "Bgtzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltc, "Bltc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgec) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgec, "Bgec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltuc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltuc, "Bltuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgeuc) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeuc, "Bgeuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBc1eqz) {
+ BranchFpuCondHelper(&mips::MipsAssembler::Bc1eqz, "Bc1eqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBc1nez) {
+ BranchFpuCondHelper(&mips::MipsAssembler::Bc1nez, "Bc1nez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareB) {
+ BranchHelper(&mips::MipsAssembler::B, "B", /* has_slot */ true, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBal) {
+ BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* has_slot */ true, /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeq) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBne) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBeqz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBnez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBlez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgtz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBlt) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBge) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBltu) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, BareBgeu) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBeqc) {
mips::MipsLabel label;
- __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
- constexpr size_t kAdduCount = 0x3FFDE;
- for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Beqc(mips::A0, mips::A1, &label);
+ constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
+ for (uint32_t i = 0; i != kAdduCount1; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
}
__ Bind(&label);
+ constexpr uint32_t kAdduCount2 = (1u << 15) + 1;
+ for (uint32_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Beqc(mips::A2, mips::A3, &label);
- std::string expected =
- "lapc $v0, 1f\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "1:\n";
- DriverStr(expected, "LoadFarthestNearLabelAddress");
+ uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic.
+ offset_forward <<= 2;
+ offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic.
+
+ uint32_t offset_back = -(kAdduCount2 + 1); // 1: account for bnec.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic.
+
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "bnec $a0, $a1, 1f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+ "1:\n" <<
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
+ "2:\n" <<
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
+ "bnec $a2, $a3, 3f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+ "3:\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBeqc");
}
-TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) {
+TEST_F(AssemblerMIPS32r6Test, LongBeqzc) {
+ constexpr uint32_t kNopCount1 = (1u << 20) + 1;
+ constexpr uint32_t kNopCount2 = (1u << 20) + 1;
+ constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
+ ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
+ __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
mips::MipsLabel label;
- __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
- constexpr size_t kAdduCount = 0x3FFDF;
- for (size_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ __ Beqzc(mips::A0, &label);
+ for (uint32_t i = 0; i != kNopCount1; ++i) {
+ __ Nop();
}
__ Bind(&label);
+ for (uint32_t i = 0; i != kNopCount2; ++i) {
+ __ Nop();
+ }
+ __ Beqzc(mips::A2, &label);
- std::string expected =
- "1:\n"
- "auipc $at, %hi(2f - 1b)\n"
- "addiu $v0, $at, %lo(2f - 1b)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "2:\n";
- DriverStr(expected, "LoadNearestFarLabelAddress");
-}
+ uint32_t offset_forward = 2 + kNopCount1; // 2: account for auipc and jic.
+ offset_forward <<= 2;
+ offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic.
-TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
- mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ LoadLiteral(mips::V0, mips::ZERO, literal);
- constexpr size_t kAdduCount = 0x3FFDE;
- for (size_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ uint32_t offset_back = -(kNopCount2 + 1); // 1: account for bnezc.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic.
+
+ // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
+ // instead of generating them ourselves in the source code. This saves test time.
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "bnezc $a0, 1f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+ "1:\n" <<
+ ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
+ "2:\n" <<
+ ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
+ "bnezc $a2, 3f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+ "3:\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBeqzc");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LongBc) {
+ constexpr uint32_t kNopCount1 = (1u << 25) + 1;
+ constexpr uint32_t kNopCount2 = (1u << 25) + 1;
+ constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
+ ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
+ __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
+ mips::MipsLabel label1, label2;
+ __ Bc(&label1);
+ for (uint32_t i = 0; i != kNopCount1; ++i) {
+ __ Nop();
+ }
+ __ Bind(&label1);
+ __ Bc(&label2);
+ for (uint32_t i = 0; i != kNopCount2; ++i) {
+ __ Nop();
}
+ __ Bind(&label2);
+ __ Bc(&label1);
- std::string expected =
- "lwpc $v0, 1f\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "1:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadFarthestNearLiteral");
-}
+ uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jic.
+ offset_forward1 <<= 2;
+ offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jic.
-TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
- mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ LoadLiteral(mips::V0, mips::ZERO, literal);
- constexpr size_t kAdduCount = 0x3FFDF;
- for (size_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
+ uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jic.
+ offset_forward2 <<= 2;
+ offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jic.
- std::string expected =
+ uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jic.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic.
+
+ // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
+ // instead of generating them ourselves in the source code. This saves a few minutes
+ // of test time.
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
+ ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
"1:\n"
- "auipc $at, %hi(2f - 1b)\n"
- "lw $v0, %lo(2f - 1b)($at)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
+ ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
"2:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadNearestFarLiteral");
+ "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBc");
}
-//////////////
-// BRANCHES //
-//////////////
-
TEST_F(AssemblerMIPS32r6Test, ImpossibleReordering) {
mips::MipsLabel label;
__ SetReorder(true);
@@ -1112,10 +1529,62 @@ TEST_F(AssemblerMIPS32r6Test, SetReorder) {
DriverStr(expected, "SetReorder");
}
+TEST_F(AssemblerMIPS32r6Test, ReorderPatchedInstruction) {
+ __ SetReorder(true);
+ mips::MipsLabel label1, label2;
+ mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5;
+ __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1);
+ __ Bc1eqz(mips::F0, &label1);
+ constexpr uint32_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label1);
+ __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2);
+ __ Bc1nez(mips::F2, &label2);
+ constexpr uint32_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label2);
+ __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3);
+ __ Bc1eqz(mips::F4, &label1);
+ __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4);
+ __ Jalr(mips::T9);
+ __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5);
+ __ Bltc(mips::V0, mips::V1, &label2);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+ std::string expected =
+ ".set noreorder\n"
+ "bc1eqz $f0, 1f\n"
+ "lw $v0, 0x5678($a0)\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ "bc1nez $f2, 2f\n"
+ "sw $v0, 0x5678($a0)\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ "bc1eqz $f4, 1b\n"
+ "addiu $v0, $a0, 0x5678\n"
+ "jalr $t9\n"
+ "lw $v0, 0x5678($a0)\n"
+ "sw $v0, 0x5678($a0)\n"
+ "bltc $v0, $v1, 2b\n"
+ "nop\n"
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, "ReorderPatchedInstruction");
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 8) * 4u);
+}
+
TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
- mips::MipsLabel label;
+ mips::MipsLabel label, patcher_label1, patcher_label2;
__ SetReorder(true);
- __ Subu(mips::T0, mips::T1, mips::T2);
+ __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1);
__ Bc1nez(mips::F0, &label);
constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
for (uint32_t i = 0; i != kAdduCount1; ++i) {
@@ -1126,7 +1595,7 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
for (uint32_t i = 0; i != kAdduCount2; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
}
- __ Subu(mips::T0, mips::T1, mips::T2);
+ __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2);
__ Bc1eqz(mips::F0, &label);
uint32_t offset_forward = 2 + kAdduCount1; // 2: account for auipc and jic.
@@ -1140,7 +1609,7 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
std::ostringstream oss;
oss <<
".set noreorder\n"
- "subu $t0, $t1, $t2\n"
+ "addiu $t0, $t1, 0x5678\n"
"bc1eqz $f0, 1f\n"
"auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
"jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
@@ -1148,49 +1617,88 @@ TEST_F(AssemblerMIPS32r6Test, LongBranchReorder) {
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
"2:\n" <<
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
- "subu $t0, $t1, $t2\n"
+ "addiu $t0, $t1, 0x5678\n"
"bc1nez $f0, 3f\n"
"auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
"jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
"3:\n";
std::string expected = oss.str();
- DriverStr(expected, "LongBeqc");
+ DriverStr(expected, "LongBranchReorder");
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 4) * 4u);
+}
+
+///////////////////////
+// Loading Constants //
+///////////////////////
+
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLabelAddress) {
+ mips::MipsLabel label;
+ __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+ constexpr size_t kAdduCount = 0x3FFDE;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ "lapc $v0, 1f\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "1:\n";
+ DriverStr(expected, "LoadFarthestNearLabelAddress");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLabelAddress) {
+ mips::MipsLabel label;
+ __ LoadLabelAddress(mips::V0, mips::ZERO, &label);
+ constexpr size_t kAdduCount = 0x3FFDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(2f - 1b)\n"
+ "addiu $v0, $at, %lo(2f - 1b)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n";
+ DriverStr(expected, "LoadNearestFarLabelAddress");
}
-// TODO: MipsAssembler::Bc
-// MipsAssembler::Jic
-// MipsAssembler::Jialc
-// MipsAssembler::Bltc
-// MipsAssembler::Bltzc
-// MipsAssembler::Bgtzc
-// MipsAssembler::Bgec
-// MipsAssembler::Bgezc
-// MipsAssembler::Blezc
-// MipsAssembler::Bltuc
-// MipsAssembler::Bgeuc
-// MipsAssembler::Beqc
-// MipsAssembler::Bnec
-// MipsAssembler::Beqzc
-// MipsAssembler::Bnezc
-// MipsAssembler::Bc1eqz
-// MipsAssembler::Bc1nez
-// MipsAssembler::Buncond
-// MipsAssembler::Bcond
-// MipsAssembler::Call
-
-// TODO: AssemblerMIPS32r6Test.B
-// AssemblerMIPS32r6Test.Beq
-// AssemblerMIPS32r6Test.Bne
-// AssemblerMIPS32r6Test.Beqz
-// AssemblerMIPS32r6Test.Bnez
-// AssemblerMIPS32r6Test.Bltz
-// AssemblerMIPS32r6Test.Bgez
-// AssemblerMIPS32r6Test.Blez
-// AssemblerMIPS32r6Test.Bgtz
-// AssemblerMIPS32r6Test.Blt
-// AssemblerMIPS32r6Test.Bge
-// AssemblerMIPS32r6Test.Bltu
-// AssemblerMIPS32r6Test.Bgeu
+TEST_F(AssemblerMIPS32r6Test, LoadFarthestNearLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips::V0, mips::ZERO, literal);
+ constexpr size_t kAdduCount = 0x3FFDE;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+
+ std::string expected =
+ "lwpc $v0, 1f\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LoadNearestFarLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips::V0, mips::ZERO, literal);
+ constexpr size_t kAdduCount = 0x3FFDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(2f - 1b)\n"
+ "lw $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadNearestFarLiteral");
+}
// MSA instructions.
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index 09175309f9..680c347fef 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -186,11 +186,51 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler,
return result;
}
+ void BranchHelper(void (mips::MipsAssembler::*f)(mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ __ SetReorder(false);
+ mips::MipsLabel label1, label2;
+ (Base::GetAssembler()->*f)(&label1, is_bare);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label1);
+ (Base::GetAssembler()->*f)(&label2, is_bare);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label2);
+ (Base::GetAssembler()->*f)(&label1, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " 1f\n" +
+ (is_bare ? "" : "nop\n") +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ instr_name + " 2f\n" +
+ (is_bare ? "" : "nop\n") +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "2:\n" +
+ instr_name + " 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, instr_name);
+ }
+
void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
- mips::MipsLabel*),
- const std::string& instr_name) {
+ mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ __ SetReorder(false);
mips::MipsLabel label;
- (Base::GetAssembler()->*f)(mips::A0, &label);
+ (Base::GetAssembler()->*f)(mips::A0, &label, is_bare);
constexpr size_t kAdduCount1 = 63;
for (size_t i = 0; i != kAdduCount1; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
@@ -200,26 +240,64 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler,
for (size_t i = 0; i != kAdduCount2; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
}
- (Base::GetAssembler()->*f)(mips::A1, &label);
+ (Base::GetAssembler()->*f)(mips::A1, &label, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
std::string expected =
".set noreorder\n" +
- instr_name + " $a0, 1f\n"
- "nop\n" +
+ instr_name + " $a0, 1f\n" +
+ (is_bare ? "" : "nop\n") +
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
"1:\n" +
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- instr_name + " $a1, 1b\n"
- "nop\n";
+ instr_name + " $a1, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
DriverStr(expected, instr_name);
}
void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
mips::Register,
- mips::MipsLabel*),
- const std::string& instr_name) {
+ mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ __ SetReorder(false);
+ mips::MipsLabel label;
+ (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label, is_bare);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " $a0, $a1, 1f\n" +
+ (is_bare ? "" : "nop\n") +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ instr_name + " $a2, $a3, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, instr_name);
+ }
+
+ void BranchFpuCondCodeHelper(void (mips::MipsAssembler::*f)(int,
+ mips::MipsLabel*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ __ SetReorder(false);
mips::MipsLabel label;
- (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
+ (Base::GetAssembler()->*f)(0, &label, is_bare);
constexpr size_t kAdduCount1 = 63;
for (size_t i = 0; i != kAdduCount1; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
@@ -229,17 +307,19 @@ class AssemblerMIPSTest : public AssemblerTest<mips::MipsAssembler,
for (size_t i = 0; i != kAdduCount2; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
}
- (Base::GetAssembler()->*f)(mips::A2, mips::A3, &label);
+ (Base::GetAssembler()->*f)(7, &label, is_bare);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
std::string expected =
".set noreorder\n" +
- instr_name + " $a0, $a1, 1f\n"
- "nop\n" +
+ instr_name + " $fcc0, 1f\n" +
+ (is_bare ? "" : "nop\n") +
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
"1:\n" +
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- instr_name + " $a2, $a3, 1b\n"
- "nop\n";
+ instr_name + " $fcc7, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
DriverStr(expected, instr_name);
}
@@ -2072,410 +2152,136 @@ TEST_F(AssemblerMIPSTest, StoreConstToOffset) {
DriverStr(expected, "StoreConstToOffset");
}
+//////////////
+// BRANCHES //
+//////////////
+
TEST_F(AssemblerMIPSTest, B) {
- mips::MipsLabel label1, label2;
- __ B(&label1);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label1);
- __ B(&label2);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label2);
- __ B(&label1);
+ BranchHelper(&mips::MipsAssembler::B, "B");
+}
- std::string expected =
- ".set noreorder\n"
- "b 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n"
- "b 2f\n"
- "nop\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "2:\n"
- "b 1b\n"
- "nop\n";
- DriverStr(expected, "B");
+TEST_F(AssemblerMIPSTest, Bal) {
+ BranchHelper(&mips::MipsAssembler::Bal, "Bal");
}
TEST_F(AssemblerMIPSTest, Beq) {
- __ SetReorder(false);
BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq");
}
TEST_F(AssemblerMIPSTest, Bne) {
- __ SetReorder(false);
BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne");
}
TEST_F(AssemblerMIPSTest, Beqz) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Beqz(mips::A0, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Beqz(mips::A1, &label);
-
- std::string expected =
- ".set noreorder\n"
- "beq $zero, $a0, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "beq $zero, $a1, 1b\n"
- "nop\n";
- DriverStr(expected, "Beqz");
+ BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz");
}
TEST_F(AssemblerMIPSTest, Bnez) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Bnez(mips::A0, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bnez(mips::A1, &label);
-
- std::string expected =
- ".set noreorder\n"
- "bne $zero, $a0, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "bne $zero, $a1, 1b\n"
- "nop\n";
- DriverStr(expected, "Bnez");
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez");
}
TEST_F(AssemblerMIPSTest, Bltz) {
- __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz");
}
TEST_F(AssemblerMIPSTest, Bgez) {
- __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez");
}
TEST_F(AssemblerMIPSTest, Blez) {
- __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez");
}
TEST_F(AssemblerMIPSTest, Bgtz) {
- __ SetReorder(false);
BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz");
}
TEST_F(AssemblerMIPSTest, Blt) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Blt(mips::A0, mips::A1, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Blt(mips::A2, mips::A3, &label);
-
- std::string expected =
- ".set noreorder\n"
- "slt $at, $a0, $a1\n"
- "bne $zero, $at, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "slt $at, $a2, $a3\n"
- "bne $zero, $at, 1b\n"
- "nop\n";
- DriverStr(expected, "Blt");
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt");
}
TEST_F(AssemblerMIPSTest, Bge) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Bge(mips::A0, mips::A1, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bge(mips::A2, mips::A3, &label);
-
- std::string expected =
- ".set noreorder\n"
- "slt $at, $a0, $a1\n"
- "beq $zero, $at, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "slt $at, $a2, $a3\n"
- "beq $zero, $at, 1b\n"
- "nop\n";
- DriverStr(expected, "Bge");
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge");
}
TEST_F(AssemblerMIPSTest, Bltu) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Bltu(mips::A0, mips::A1, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bltu(mips::A2, mips::A3, &label);
-
- std::string expected =
- ".set noreorder\n"
- "sltu $at, $a0, $a1\n"
- "bne $zero, $at, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "sltu $at, $a2, $a3\n"
- "bne $zero, $at, 1b\n"
- "nop\n";
- DriverStr(expected, "Bltu");
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu");
}
TEST_F(AssemblerMIPSTest, Bgeu) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Bgeu(mips::A0, mips::A1, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bgeu(mips::A2, mips::A3, &label);
-
- std::string expected =
- ".set noreorder\n"
- "sltu $at, $a0, $a1\n"
- "beq $zero, $at, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "sltu $at, $a2, $a3\n"
- "beq $zero, $at, 1b\n"
- "nop\n";
- DriverStr(expected, "Bgeu");
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu");
}
TEST_F(AssemblerMIPSTest, Bc1f) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Bc1f(0, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bc1f(7, &label);
-
- std::string expected =
- ".set noreorder\n"
- "bc1f $fcc0, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "bc1f $fcc7, 1b\n"
- "nop\n";
- DriverStr(expected, "Bc1f");
+ BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f");
}
TEST_F(AssemblerMIPSTest, Bc1t) {
- __ SetReorder(false);
- mips::MipsLabel label;
- __ Bc1t(0, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bc1t(7, &label);
+ BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t");
+}
- std::string expected =
- ".set noreorder\n"
- "bc1t $fcc0, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "bc1t $fcc7, 1b\n"
- "nop\n";
- DriverStr(expected, "Bc1t");
+TEST_F(AssemblerMIPSTest, BareB) {
+ BranchHelper(&mips::MipsAssembler::B, "B", /* is_bare */ true);
}
-///////////////////////
-// Loading Constants //
-///////////////////////
+TEST_F(AssemblerMIPSTest, BareBal) {
+ BranchHelper(&mips::MipsAssembler::Bal, "Bal", /* is_bare */ true);
+}
-TEST_F(AssemblerMIPSTest, LoadConst32) {
- // IsUint<16>(value)
- __ LoadConst32(mips::V0, 0);
- __ LoadConst32(mips::V0, 65535);
- // IsInt<16>(value)
- __ LoadConst32(mips::V0, -1);
- __ LoadConst32(mips::V0, -32768);
- // Everything else
- __ LoadConst32(mips::V0, 65536);
- __ LoadConst32(mips::V0, 65537);
- __ LoadConst32(mips::V0, 2147483647);
- __ LoadConst32(mips::V0, -32769);
- __ LoadConst32(mips::V0, -65536);
- __ LoadConst32(mips::V0, -65537);
- __ LoadConst32(mips::V0, -2147483647);
- __ LoadConst32(mips::V0, -2147483648);
+TEST_F(AssemblerMIPSTest, BareBeq) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Beq, "Beq", /* is_bare */ true);
+}
- const char* expected =
- // IsUint<16>(value)
- "ori $v0, $zero, 0\n" // __ LoadConst32(mips::V0, 0);
- "ori $v0, $zero, 65535\n" // __ LoadConst32(mips::V0, 65535);
- // IsInt<16>(value)
- "addiu $v0, $zero, -1\n" // __ LoadConst32(mips::V0, -1);
- "addiu $v0, $zero, -32768\n" // __ LoadConst32(mips::V0, -32768);
- // Everything else
- "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65536);
- "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65537);
- "ori $v0, 1\n" // "
- "lui $v0, 32767\n" // __ LoadConst32(mips::V0, 2147483647);
- "ori $v0, 65535\n" // "
- "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -32769);
- "ori $v0, 32767\n" // "
- "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -65536);
- "lui $v0, 65534\n" // __ LoadConst32(mips::V0, -65537);
- "ori $v0, 65535\n" // "
- "lui $v0, 32768\n" // __ LoadConst32(mips::V0, -2147483647);
- "ori $v0, 1\n" // "
- "lui $v0, 32768\n"; // __ LoadConst32(mips::V0, -2147483648);
- DriverStr(expected, "LoadConst32");
+TEST_F(AssemblerMIPSTest, BareBne) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bne, "Bne", /* is_bare */ true);
}
-TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) {
- mips::MipsLabel label;
- __ BindPcRelBaseLabel();
- __ LoadLabelAddress(mips::V0, mips::V1, &label);
- constexpr size_t kAddiuCount = 0x1FDE;
- for (size_t i = 0; i != kAddiuCount; ++i) {
- __ Addiu(mips::A0, mips::A1, 0);
- }
- __ Bind(&label);
+TEST_F(AssemblerMIPSTest, BareBeqz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Beqz, "Beqz", /* is_bare */ true);
+}
- std::string expected =
- "1:\n"
- "addiu $v0, $v1, %lo(2f - 1b)\n" +
- RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
- "2:\n";
- DriverStr(expected, "LoadFarthestNearLabelAddress");
+TEST_F(AssemblerMIPSTest, BareBnez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bnez, "Bnez", /* is_bare */ true);
}
-TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) {
- mips::MipsLabel label;
- __ BindPcRelBaseLabel();
- __ LoadLabelAddress(mips::V0, mips::V1, &label);
- constexpr size_t kAdduCount = 0x1FDF;
- for (size_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
- __ Bind(&label);
+TEST_F(AssemblerMIPSTest, BareBltz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bltz, "Bltz", /* is_bare */ true);
+}
- std::string expected =
- "1:\n"
- "lui $at, %hi(2f - 1b)\n"
- "ori $at, $at, %lo(2f - 1b)\n"
- "addu $v0, $at, $v1\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "2:\n";
- DriverStr(expected, "LoadNearestFarLabelAddress");
+TEST_F(AssemblerMIPSTest, BareBgez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgez, "Bgez", /* is_bare */ true);
}
-TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
- mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ BindPcRelBaseLabel();
- __ LoadLiteral(mips::V0, mips::V1, literal);
- constexpr size_t kAddiuCount = 0x1FDE;
- for (size_t i = 0; i != kAddiuCount; ++i) {
- __ Addiu(mips::A0, mips::A1, 0);
- }
+TEST_F(AssemblerMIPSTest, BareBlez) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Blez, "Blez", /* is_bare */ true);
+}
- std::string expected =
- "1:\n"
- "lw $v0, %lo(2f - 1b)($v1)\n" +
- RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
- "2:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadFarthestNearLiteral");
+TEST_F(AssemblerMIPSTest, BareBgtz) {
+ BranchCondOneRegHelper(&mips::MipsAssembler::Bgtz, "Bgtz", /* is_bare */ true);
}
-TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
- mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ BindPcRelBaseLabel();
- __ LoadLiteral(mips::V0, mips::V1, literal);
- constexpr size_t kAdduCount = 0x1FDF;
- for (size_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
- }
+TEST_F(AssemblerMIPSTest, BareBlt) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Blt, "Blt", /* is_bare */ true);
+}
- std::string expected =
- "1:\n"
- "lui $at, %hi(2f - 1b)\n"
- "addu $at, $at, $v1\n"
- "lw $v0, %lo(2f - 1b)($at)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "2:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadNearestFarLiteral");
+TEST_F(AssemblerMIPSTest, BareBge) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bge, "Bge", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBltu) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bltu, "Bltu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBgeu) {
+ BranchCondTwoRegsHelper(&mips::MipsAssembler::Bgeu, "Bgeu", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBc1f) {
+ BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1f, "Bc1f", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPSTest, BareBc1t) {
+ BranchFpuCondCodeHelper(&mips::MipsAssembler::Bc1t, "Bc1t", /* is_bare */ true);
}
TEST_F(AssemblerMIPSTest, ImpossibleReordering) {
@@ -2554,7 +2360,7 @@ TEST_F(AssemblerMIPSTest, ImpossibleReordering) {
"nop\n"
"addu $t0, $t1, $t2\n"
- "beq $zero, $t0, 1b\n"
+ "beqz $t0, 1b\n"
"nop\n"
"or $t1, $t2, $t3\n"
@@ -2563,17 +2369,17 @@ TEST_F(AssemblerMIPSTest, ImpossibleReordering) {
"and $t0, $t1, $t2\n"
"slt $at, $t1, $t0\n"
- "bne $zero, $at, 1b\n"
+ "bnez $at, 1b\n"
"nop\n"
"xor $at, $t0, $t1\n"
"slt $at, $t1, $t0\n"
- "beq $zero, $at, 1b\n"
+ "beqz $at, 1b\n"
"nop\n"
"subu $t0, $t1, $at\n"
"sltu $at, $t1, $t0\n"
- "bne $zero, $at, 1b\n"
+ "bnez $at, 1b\n"
"nop\n"
"c.olt.s $fcc1, $f2, $f4\n"
@@ -2606,11 +2412,11 @@ TEST_F(AssemblerMIPSTest, ImpossibleReordering) {
"2:\n"
- "bne $zero, $t0, 2b\n"
+ "bnez $t0, 2b\n"
"nop\n"
"sltu $at, $t1, $t0\n"
- "beq $zero, $at, 2b\n"
+ "beqz $at, 2b\n"
"nop\n"
"bc1f $fcc2, 2b\n"
@@ -2666,22 +2472,22 @@ TEST_F(AssemblerMIPSTest, Reordering) {
".set noreorder\n"
"1:\n"
- "beq $zero, $t1, 1b\n"
+ "beqz $t1, 1b\n"
"addu $t0, $t1, $t2\n"
"bne $t2, $t3, 1b\n"
"or $t1, $t2, $t3\n"
"slt $at, $t1, $t2\n"
- "bne $zero, $at, 1b\n"
+ "bnez $at, 1b\n"
"and $t0, $t1, $t2\n"
"slt $at, $t1, $t0\n"
- "beq $zero, $at, 1b\n"
+ "beqz $at, 1b\n"
"xor $t2, $t0, $t1\n"
"sltu $at, $t1, $t0\n"
- "bne $zero, $at, 1b\n"
+ "bnez $at, 1b\n"
"subu $t2, $t1, $t0\n"
"bc1t $fcc1, 1b\n"
@@ -2700,6 +2506,7 @@ TEST_F(AssemblerMIPSTest, Reordering) {
TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) {
mips::MipsLabel label1, label2, label3, label4, label5, label6;
+ mips::MipsLabel label7, label8, label9, label10, label11, label12, label13;
__ SetReorder(true);
__ B(&label1);
@@ -2723,6 +2530,41 @@ TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) {
__ Bind(&label6);
__ CodePosition(); // Even across Bind(), CodePosition() prevents absorbing the ADDU above.
+ __ Nop();
+ __ B(&label7);
+ __ Bind(&label7);
+ __ Lw(mips::V0, mips::A0, 0x5678); // Possibly patchable instruction, not absorbed.
+
+ __ Nop();
+ __ B(&label8);
+ __ Bind(&label8);
+ __ Sw(mips::V0, mips::A0, 0x5678); // Possibly patchable instruction, not absorbed.
+
+ __ Nop();
+ __ B(&label9);
+ __ Bind(&label9);
+ __ Addiu(mips::V0, mips::A0, 0x5678); // Possibly patchable instruction, not absorbed.
+
+ __ Nop();
+ __ B(&label10);
+ __ Bind(&label10);
+ __ Lw(mips::V0, mips::A0, 0x5680); // Immediate isn't 0x5678, absorbed.
+
+ __ Nop();
+ __ B(&label11);
+ __ Bind(&label11);
+ __ Sw(mips::V0, mips::A0, 0x5680); // Immediate isn't 0x5678, absorbed.
+
+ __ Nop();
+ __ B(&label12);
+ __ Bind(&label12);
+ __ Addiu(mips::V0, mips::A0, 0x5680); // Immediate isn't 0x5678, absorbed.
+
+ __ Nop();
+ __ B(&label13);
+ __ Bind(&label13);
+ __ Andi(mips::V0, mips::A0, 0x5678); // Not one of patchable instructions, absorbed.
+
std::string expected =
".set noreorder\n"
"b 1f\n"
@@ -2744,7 +2586,49 @@ TEST_F(AssemblerMIPSTest, AbsorbTargetInstruction) {
"b 5f\n"
"nop\n"
"5:\n"
- "addu $t0, $t1, $t2\n";
+ "addu $t0, $t1, $t2\n"
+
+ "nop\n"
+ "b 7f\n"
+ "nop\n"
+ "7:\n"
+ "lw $v0, 0x5678($a0)\n"
+
+ "nop\n"
+ "b 8f\n"
+ "nop\n"
+ "8:\n"
+ "sw $v0, 0x5678($a0)\n"
+
+ "nop\n"
+ "b 9f\n"
+ "nop\n"
+ "9:\n"
+ "addiu $v0, $a0, 0x5678\n"
+
+ "nop\n"
+ "b 10f\n"
+ "lw $v0, 0x5680($a0)\n"
+ "lw $v0, 0x5680($a0)\n"
+ "10:\n"
+
+ "nop\n"
+ "b 11f\n"
+ "sw $v0, 0x5680($a0)\n"
+ "sw $v0, 0x5680($a0)\n"
+ "11:\n"
+
+ "nop\n"
+ "b 12f\n"
+ "addiu $v0, $a0, 0x5680\n"
+ "addiu $v0, $a0, 0x5680\n"
+ "12:\n"
+
+ "nop\n"
+ "b 13f\n"
+ "andi $v0, $a0, 0x5678\n"
+ "andi $v0, $a0, 0x5678\n"
+ "13:\n";
DriverStr(expected, "AbsorbTargetInstruction");
}
@@ -2831,10 +2715,62 @@ TEST_F(AssemblerMIPSTest, SetReorder) {
DriverStr(expected, "SetReorder");
}
+TEST_F(AssemblerMIPSTest, ReorderPatchedInstruction) {
+ __ SetReorder(true);
+ mips::MipsLabel label1, label2;
+ mips::MipsLabel patcher_label1, patcher_label2, patcher_label3, patcher_label4, patcher_label5;
+ __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label1);
+ __ Beq(mips::A0, mips::A1, &label1);
+ constexpr uint32_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label1);
+ __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label2);
+ __ Bltz(mips::V1, &label2);
+ constexpr uint32_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label2);
+ __ Addiu(mips::V0, mips::A0, 0x5678, &patcher_label3);
+ __ B(&label1);
+ __ Lw(mips::V0, mips::A0, 0x5678, &patcher_label4);
+ __ Jalr(mips::T9);
+ __ Sw(mips::V0, mips::A0, 0x5678, &patcher_label5);
+ __ Blt(mips::V0, mips::V1, &label2);
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+
+ std::string expected =
+ ".set noreorder\n"
+ "beq $a0, $a1, 1f\n"
+ "lw $v0, 0x5678($a0)\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ "bltz $v1, 2f\n"
+ "sw $v0, 0x5678($a0)\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ "b 1b\n"
+ "addiu $v0, $a0, 0x5678\n"
+ "jalr $t9\n"
+ "lw $v0, 0x5678($a0)\n"
+ "slt $at, $v0, $v1\n"
+ "bnez $at, 2b\n"
+ "sw $v0, 0x5678($a0)\n"
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, "ReorderPatchedInstruction");
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 1 * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + 3) * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label3), (kAdduCount1 + kAdduCount2 + 5) * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label4), (kAdduCount1 + kAdduCount2 + 7) * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label5), (kAdduCount1 + kAdduCount2 + 10) * 4u);
+}
+
TEST_F(AssemblerMIPSTest, LongBranchReorder) {
- mips::MipsLabel label;
+ mips::MipsLabel label, patcher_label1, patcher_label2;
__ SetReorder(true);
- __ Subu(mips::T0, mips::T1, mips::T2);
+ __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label1);
__ B(&label);
constexpr uint32_t kAdduCount1 = (1u << 15) + 1;
for (size_t i = 0; i != kAdduCount1; ++i) {
@@ -2845,7 +2781,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) {
for (size_t i = 0; i != kAdduCount2; ++i) {
__ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
}
- __ Subu(mips::T0, mips::T1, mips::T2);
+ __ Addiu(mips::T0, mips::T1, 0x5678, &patcher_label2);
__ B(&label);
// Account for 5 extra instructions: ori, addu, lw, jalr, addiu.
@@ -2856,7 +2792,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) {
std::ostringstream oss;
oss <<
".set noreorder\n"
- "subu $t0, $t1, $t2\n"
+ "addiu $t0, $t1, 0x5678\n"
"addiu $sp, $sp, -4\n"
"sw $ra, 0($sp)\n"
"bltzal $zero, .+4\n"
@@ -2868,7 +2804,7 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) {
"addiu $sp, $sp, 4\n" <<
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") <<
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") <<
- "subu $t0, $t1, $t2\n"
+ "addiu $t0, $t1, 0x5678\n"
"addiu $sp, $sp, -4\n"
"sw $ra, 0($sp)\n"
"bltzal $zero, .+4\n"
@@ -2880,6 +2816,129 @@ TEST_F(AssemblerMIPSTest, LongBranchReorder) {
"addiu $sp, $sp, 4\n";
std::string expected = oss.str();
DriverStr(expected, "LongBranchReorder");
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label1), 0 * 4u);
+ EXPECT_EQ(__ GetLabelLocation(&patcher_label2), (kAdduCount1 + kAdduCount2 + 10) * 4u);
+}
+
+///////////////////////
+// Loading Constants //
+///////////////////////
+
+TEST_F(AssemblerMIPSTest, LoadConst32) {
+ // IsUint<16>(value)
+ __ LoadConst32(mips::V0, 0);
+ __ LoadConst32(mips::V0, 65535);
+ // IsInt<16>(value)
+ __ LoadConst32(mips::V0, -1);
+ __ LoadConst32(mips::V0, -32768);
+ // Everything else
+ __ LoadConst32(mips::V0, 65536);
+ __ LoadConst32(mips::V0, 65537);
+ __ LoadConst32(mips::V0, 2147483647);
+ __ LoadConst32(mips::V0, -32769);
+ __ LoadConst32(mips::V0, -65536);
+ __ LoadConst32(mips::V0, -65537);
+ __ LoadConst32(mips::V0, -2147483647);
+ __ LoadConst32(mips::V0, -2147483648);
+
+ const char* expected =
+ // IsUint<16>(value)
+ "ori $v0, $zero, 0\n" // __ LoadConst32(mips::V0, 0);
+ "ori $v0, $zero, 65535\n" // __ LoadConst32(mips::V0, 65535);
+ // IsInt<16>(value)
+ "addiu $v0, $zero, -1\n" // __ LoadConst32(mips::V0, -1);
+ "addiu $v0, $zero, -32768\n" // __ LoadConst32(mips::V0, -32768);
+ // Everything else
+ "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65536);
+ "lui $v0, 1\n" // __ LoadConst32(mips::V0, 65537);
+ "ori $v0, 1\n" // "
+ "lui $v0, 32767\n" // __ LoadConst32(mips::V0, 2147483647);
+ "ori $v0, 65535\n" // "
+ "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -32769);
+ "ori $v0, 32767\n" // "
+ "lui $v0, 65535\n" // __ LoadConst32(mips::V0, -65536);
+ "lui $v0, 65534\n" // __ LoadConst32(mips::V0, -65537);
+ "ori $v0, 65535\n" // "
+ "lui $v0, 32768\n" // __ LoadConst32(mips::V0, -2147483647);
+ "ori $v0, 1\n" // "
+ "lui $v0, 32768\n"; // __ LoadConst32(mips::V0, -2147483648);
+ DriverStr(expected, "LoadConst32");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLabelAddress) {
+ mips::MipsLabel label;
+ __ BindPcRelBaseLabel();
+ __ LoadLabelAddress(mips::V0, mips::V1, &label);
+ constexpr size_t kAddiuCount = 0x1FDE;
+ for (size_t i = 0; i != kAddiuCount; ++i) {
+ __ Addiu(mips::A0, mips::A1, 0);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ "1:\n"
+ "addiu $v0, $v1, %lo(2f - 1b)\n" +
+ RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+ "2:\n";
+ DriverStr(expected, "LoadFarthestNearLabelAddress");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLabelAddress) {
+ mips::MipsLabel label;
+ __ BindPcRelBaseLabel();
+ __ LoadLabelAddress(mips::V0, mips::V1, &label);
+ constexpr size_t kAdduCount = 0x1FDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ "1:\n"
+ "lui $at, %hi(2f - 1b)\n"
+ "ori $at, $at, %lo(2f - 1b)\n"
+ "addu $v0, $at, $v1\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n";
+ DriverStr(expected, "LoadNearestFarLabelAddress");
+}
+
+TEST_F(AssemblerMIPSTest, LoadFarthestNearLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ BindPcRelBaseLabel();
+ __ LoadLiteral(mips::V0, mips::V1, literal);
+ constexpr size_t kAddiuCount = 0x1FDE;
+ for (size_t i = 0; i != kAddiuCount; ++i) {
+ __ Addiu(mips::A0, mips::A1, 0);
+ }
+
+ std::string expected =
+ "1:\n"
+ "lw $v0, %lo(2f - 1b)($v1)\n" +
+ RepeatInsn(kAddiuCount, "addiu $a0, $a1, %hi(2f - 1b)\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadFarthestNearLiteral");
+}
+
+TEST_F(AssemblerMIPSTest, LoadNearestFarLiteral) {
+ mips::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ BindPcRelBaseLabel();
+ __ LoadLiteral(mips::V0, mips::V1, literal);
+ constexpr size_t kAdduCount = 0x1FDF;
+ for (size_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips::ZERO, mips::ZERO, mips::ZERO);
+ }
+
+ std::string expected =
+ "1:\n"
+ "lui $at, %hi(2f - 1b)\n"
+ "addu $at, $at, $v1\n"
+ "lw $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadNearestFarLiteral");
}
#undef __
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 7a1beb656b..183b5e507b 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -795,14 +795,42 @@ void Mips64Assembler::Bc1nez(FpuRegister ft, uint16_t imm16) {
EmitFI(0x11, 0xD, ft, imm16);
}
+void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+ EmitI(0x4, rs, rt, imm16);
+}
+
+void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16) {
+ EmitI(0x5, rs, rt, imm16);
+}
+
void Mips64Assembler::Beqz(GpuRegister rt, uint16_t imm16) {
- EmitI(0x4, ZERO, rt, imm16);
+ Beq(rt, ZERO, imm16);
+}
+
+void Mips64Assembler::Bnez(GpuRegister rt, uint16_t imm16) {
+ Bne(rt, ZERO, imm16);
+}
+
+void Mips64Assembler::Bltz(GpuRegister rt, uint16_t imm16) {
+ EmitI(0x1, rt, static_cast<GpuRegister>(0), imm16);
}
-void Mips64Assembler::EmitBcondc(BranchCondition cond,
- GpuRegister rs,
- GpuRegister rt,
- uint32_t imm16_21) {
+void Mips64Assembler::Bgez(GpuRegister rt, uint16_t imm16) {
+ EmitI(0x1, rt, static_cast<GpuRegister>(0x1), imm16);
+}
+
+void Mips64Assembler::Blez(GpuRegister rt, uint16_t imm16) {
+ EmitI(0x6, rt, static_cast<GpuRegister>(0), imm16);
+}
+
+void Mips64Assembler::Bgtz(GpuRegister rt, uint16_t imm16) {
+ EmitI(0x7, rt, static_cast<GpuRegister>(0), imm16);
+}
+
+void Mips64Assembler::EmitBcondR6(BranchCondition cond,
+ GpuRegister rs,
+ GpuRegister rt,
+ uint32_t imm16_21) {
switch (cond) {
case kCondLT:
Bltc(rs, rt, imm16_21);
@@ -866,6 +894,55 @@ void Mips64Assembler::EmitBcondc(BranchCondition cond,
}
}
+void Mips64Assembler::EmitBcondR2(BranchCondition cond,
+ GpuRegister rs,
+ GpuRegister rt,
+ uint16_t imm16) {
+ switch (cond) {
+ case kCondLTZ:
+ CHECK_EQ(rt, ZERO);
+ Bltz(rs, imm16);
+ break;
+ case kCondGEZ:
+ CHECK_EQ(rt, ZERO);
+ Bgez(rs, imm16);
+ break;
+ case kCondLEZ:
+ CHECK_EQ(rt, ZERO);
+ Blez(rs, imm16);
+ break;
+ case kCondGTZ:
+ CHECK_EQ(rt, ZERO);
+ Bgtz(rs, imm16);
+ break;
+ case kCondEQ:
+ Beq(rs, rt, imm16);
+ break;
+ case kCondNE:
+ Bne(rs, rt, imm16);
+ break;
+ case kCondEQZ:
+ CHECK_EQ(rt, ZERO);
+ Beqz(rs, imm16);
+ break;
+ case kCondNEZ:
+ CHECK_EQ(rt, ZERO);
+ Bnez(rs, imm16);
+ break;
+ case kCondF:
+ case kCondT:
+ case kCondLT:
+ case kCondGE:
+ case kCondLE:
+ case kCondGT:
+ case kCondLTU:
+ case kCondGEU:
+ case kUncond:
+ LOG(FATAL) << "Unexpected branch condition " << cond;
+ UNREACHABLE();
+ }
+}
+
void Mips64Assembler::AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
EmitFR(0x11, 0x10, ft, fs, fd, 0x0);
}
@@ -1002,6 +1079,22 @@ void Mips64Assembler::SelD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
EmitFR(0x11, 0x11, ft, fs, fd, 0x10);
}
+void Mips64Assembler::SeleqzS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x10, ft, fs, fd, 0x14);
+}
+
+void Mips64Assembler::SeleqzD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x11, ft, fs, fd, 0x14);
+}
+
+void Mips64Assembler::SelnezS(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x10, ft, fs, fd, 0x17);
+}
+
+void Mips64Assembler::SelnezD(FpuRegister fd, FpuRegister fs, FpuRegister ft) {
+ EmitFR(0x11, 0x11, ft, fs, fd, 0x17);
+}
+
void Mips64Assembler::RintS(FpuRegister fd, FpuRegister fs) {
EmitFR(0x11, 0x10, static_cast<FpuRegister>(0), fs, fd, 0x1a);
}
@@ -2013,37 +2106,67 @@ void Mips64Assembler::Branch::InitShortOrLong(Mips64Assembler::Branch::OffsetBit
type_ = (offset_size <= branch_info_[short_type].offset_size) ? short_type : long_type;
}
-void Mips64Assembler::Branch::InitializeType(Type initial_type) {
- OffsetBits offset_size = GetOffsetSizeNeeded(location_, target_);
- switch (initial_type) {
- case kLabel:
- case kLiteral:
- case kLiteralUnsigned:
- case kLiteralLong:
- CHECK(!IsResolved());
- type_ = initial_type;
- break;
- case kCall:
- InitShortOrLong(offset_size, kCall, kLongCall);
- break;
- case kCondBranch:
- switch (condition_) {
- case kUncond:
- InitShortOrLong(offset_size, kUncondBranch, kLongUncondBranch);
- break;
- case kCondEQZ:
- case kCondNEZ:
- // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
- type_ = (offset_size <= kOffset23) ? kCondBranch : kLongCondBranch;
- break;
- default:
- InitShortOrLong(offset_size, kCondBranch, kLongCondBranch);
- break;
- }
- break;
- default:
- LOG(FATAL) << "Unexpected branch type " << initial_type;
- UNREACHABLE();
+void Mips64Assembler::Branch::InitializeType(Type initial_type, bool is_r6) {
+ OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_);
+ if (is_r6) {
+ // R6
+ switch (initial_type) {
+ case kLabel:
+ case kLiteral:
+ case kLiteralUnsigned:
+ case kLiteralLong:
+ CHECK(!IsResolved());
+ type_ = initial_type;
+ break;
+ case kCall:
+ InitShortOrLong(offset_size_needed, kCall, kLongCall);
+ break;
+ case kCondBranch:
+ switch (condition_) {
+ case kUncond:
+ InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch);
+ break;
+ case kCondEQZ:
+ case kCondNEZ:
+ // Special case for beqzc/bnezc with longer offset than in other b<cond>c instructions.
+ type_ = (offset_size_needed <= kOffset23) ? kCondBranch : kLongCondBranch;
+ break;
+ default:
+ InitShortOrLong(offset_size_needed, kCondBranch, kLongCondBranch);
+ break;
+ }
+ break;
+ case kBareCall:
+ type_ = kBareCall;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
+ case kBareCondBranch:
+ type_ = (condition_ == kUncond) ? kBareUncondBranch : kBareCondBranch;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected branch type " << initial_type;
+ UNREACHABLE();
+ }
+ } else {
+ // R2
+ CHECK_EQ(initial_type, kBareCondBranch);
+ switch (condition_) {
+ case kCondLTZ:
+ case kCondGEZ:
+ case kCondLEZ:
+ case kCondGTZ:
+ case kCondEQ:
+ case kCondNE:
+ case kCondEQZ:
+ case kCondNEZ:
+ break;
+ default:
+ LOG(FATAL) << "Unexpected R2 branch condition " << condition_;
+ UNREACHABLE();
+ }
+ type_ = kR2BareCondBranch;
+ CHECK_LE(offset_size_needed, GetOffsetSize());
}
old_type_ = type_;
}
@@ -2076,21 +2199,25 @@ bool Mips64Assembler::Branch::IsUncond(BranchCondition condition,
}
}
-Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call)
+Mips64Assembler::Branch::Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare)
: old_location_(location),
location_(location),
target_(target),
lhs_reg_(ZERO),
rhs_reg_(ZERO),
condition_(kUncond) {
- InitializeType(is_call ? kCall : kCondBranch);
+ InitializeType(
+ (is_call ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareCondBranch : kCondBranch)),
+ /* is_r6 */ true);
}
-Mips64Assembler::Branch::Branch(uint32_t location,
+Mips64Assembler::Branch::Branch(bool is_r6,
+ uint32_t location,
uint32_t target,
Mips64Assembler::BranchCondition condition,
GpuRegister lhs_reg,
- GpuRegister rhs_reg)
+ GpuRegister rhs_reg,
+ bool is_bare)
: old_location_(location),
location_(location),
target_(target),
@@ -2131,7 +2258,7 @@ Mips64Assembler::Branch::Branch(uint32_t location,
// Branch condition is always true, make the branch unconditional.
condition_ = kUncond;
}
- InitializeType(kCondBranch);
+ InitializeType((is_bare ? kBareCondBranch : kCondBranch), is_r6);
}
Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type)
@@ -2142,7 +2269,7 @@ Mips64Assembler::Branch::Branch(uint32_t location, GpuRegister dest_reg, Type la
rhs_reg_(ZERO),
condition_(kUncond) {
CHECK_NE(dest_reg, ZERO);
- InitializeType(label_or_literal_type);
+ InitializeType(label_or_literal_type, /* is_r6 */ true);
}
Mips64Assembler::BranchCondition Mips64Assembler::Branch::OppositeCondition(
@@ -2238,12 +2365,32 @@ uint32_t Mips64Assembler::Branch::GetOldEndLocation() const {
return GetOldLocation() + GetOldSize();
}
+bool Mips64Assembler::Branch::IsBare() const {
+ switch (type_) {
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ case kBareUncondBranch:
+ case kBareCondBranch:
+ case kBareCall:
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ case kR2BareCondBranch:
+ return true;
+ default:
+ return false;
+ }
+}
+
bool Mips64Assembler::Branch::IsLong() const {
switch (type_) {
- // Short branches.
+ // R6 short branches (can be promoted to long).
case kUncondBranch:
case kCondBranch:
case kCall:
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ case kBareUncondBranch:
+ case kBareCondBranch:
+ case kBareCall:
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ case kR2BareCondBranch:
// Near label.
case kLabel:
// Near literals.
@@ -2271,8 +2418,9 @@ bool Mips64Assembler::Branch::IsResolved() const {
}
Mips64Assembler::Branch::OffsetBits Mips64Assembler::Branch::GetOffsetSize() const {
+ bool r6_cond_branch = (type_ == kCondBranch || type_ == kBareCondBranch);
OffsetBits offset_size =
- (type_ == kCondBranch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
+ (r6_cond_branch && (condition_ == kCondEQZ || condition_ == kCondNEZ))
? kOffset23
: branch_info_[type_].offset_size;
return offset_size;
@@ -2318,8 +2466,9 @@ void Mips64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta)
}
void Mips64Assembler::Branch::PromoteToLong() {
+ CHECK(!IsBare()); // Bare branches do not promote.
switch (type_) {
- // Short branches.
+ // R6 short branches (can be promoted to long).
case kUncondBranch:
type_ = kLongUncondBranch;
break;
@@ -2366,7 +2515,7 @@ uint32_t Mips64Assembler::Branch::PromoteIfNeeded(uint32_t max_short_distance) {
}
// The following logic is for debugging/testing purposes.
// Promote some short branches to long when it's not really required.
- if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max())) {
+ if (UNLIKELY(max_short_distance != std::numeric_limits<uint32_t>::max() && !IsBare())) {
int64_t distance = static_cast<int64_t>(target_) - location_;
distance = (distance >= 0) ? distance : -distance;
if (distance >= max_short_distance) {
@@ -2498,13 +2647,15 @@ void Mips64Assembler::FinalizeLabeledBranch(Mips64Label* label) {
}
}
-void Mips64Assembler::Buncond(Mips64Label* label) {
+void Mips64Assembler::Buncond(Mips64Label* label, bool is_bare) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(buffer_.Size(), target, /* is_call */ false);
+ branches_.emplace_back(buffer_.Size(), target, /* is_call */ false, is_bare);
FinalizeLabeledBranch(label);
}
void Mips64Assembler::Bcond(Mips64Label* label,
+ bool is_r6,
+ bool is_bare,
BranchCondition condition,
GpuRegister lhs,
GpuRegister rhs) {
@@ -2513,13 +2664,13 @@ void Mips64Assembler::Bcond(Mips64Label* label,
return;
}
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs);
+ branches_.emplace_back(is_r6, buffer_.Size(), target, condition, lhs, rhs, is_bare);
FinalizeLabeledBranch(label);
}
-void Mips64Assembler::Call(Mips64Label* label) {
+void Mips64Assembler::Call(Mips64Label* label, bool is_bare) {
uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
- branches_.emplace_back(buffer_.Size(), target, /* is_call */ true);
+ branches_.emplace_back(buffer_.Size(), target, /* is_call */ true, is_bare);
FinalizeLabeledBranch(label);
}
@@ -2730,11 +2881,18 @@ void Mips64Assembler::PromoteBranches() {
// Note: make sure branch_info_[] and EmitBranch() are kept synchronized.
const Mips64Assembler::Branch::BranchInfo Mips64Assembler::Branch::branch_info_[] = {
- // Short branches.
+ // R6 short branches (can be promoted to long).
{ 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kUncondBranch
{ 2, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kCondBranch
// Exception: kOffset23 for beqzc/bnezc
{ 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kCall
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kBareUncondBranch
+ { 1, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kBareCondBranch
+ // Exception: kOffset23 for beqzc/bnezc
+ { 1, 0, 1, Mips64Assembler::Branch::kOffset28, 2 }, // kBareCall
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ { 1, 0, 1, Mips64Assembler::Branch::kOffset18, 2 }, // kR2BareCondBranch
// Near label.
{ 1, 0, 0, Mips64Assembler::Branch::kOffset21, 2 }, // kLabel
// Near literals.
@@ -2769,13 +2927,29 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
break;
case Branch::kCondBranch:
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
- EmitBcondc(condition, lhs, rhs, offset);
+ EmitBcondR6(condition, lhs, rhs, offset);
Nop(); // TODO: improve by filling the forbidden/delay slot.
break;
case Branch::kCall:
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Balc(offset);
break;
+ case Branch::kBareUncondBranch:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Bc(offset);
+ break;
+ case Branch::kBareCondBranch:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ EmitBcondR6(condition, lhs, rhs, offset);
+ break;
+ case Branch::kBareCall:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ Balc(offset);
+ break;
+ case Branch::kR2BareCondBranch:
+ CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+ EmitBcondR2(condition, lhs, rhs, offset);
+ break;
// Near label.
case Branch::kLabel:
@@ -2804,7 +2978,7 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
Jic(AT, Low16Bits(offset));
break;
case Branch::kLongCondBranch:
- EmitBcondc(Branch::OppositeCondition(condition), lhs, rhs, 2);
+ EmitBcondR6(Branch::OppositeCondition(condition), lhs, rhs, 2);
offset += (offset & 0x8000) << 1; // Account for sign extension in jic.
CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
Auipc(AT, High16Bits(offset));
@@ -2848,68 +3022,108 @@ void Mips64Assembler::EmitBranch(Mips64Assembler::Branch* branch) {
CHECK_LT(branch->GetSize(), static_cast<uint32_t>(Branch::kMaxBranchSize));
}
-void Mips64Assembler::Bc(Mips64Label* label) {
- Buncond(label);
+void Mips64Assembler::Bc(Mips64Label* label, bool is_bare) {
+ Buncond(label, is_bare);
+}
+
+void Mips64Assembler::Balc(Mips64Label* label, bool is_bare) {
+ Call(label, is_bare);
+}
+
+void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLT, rs, rt);
+}
+
+void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLTZ, rt);
+}
+
+void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGTZ, rt);
+}
+
+void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGE, rs, rt);
+}
+
+void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGEZ, rt);
+}
+
+void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLEZ, rt);
+}
+
+void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondLTU, rs, rt);
}
-void Mips64Assembler::Balc(Mips64Label* label) {
- Call(label);
+void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondGEU, rs, rt);
}
-void Mips64Assembler::Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondLT, rs, rt);
+void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondEQ, rs, rt);
}
-void Mips64Assembler::Bltzc(GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondLTZ, rt);
+void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondNE, rs, rt);
}
-void Mips64Assembler::Bgtzc(GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondGTZ, rt);
+void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondEQZ, rs);
}
-void Mips64Assembler::Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondGE, rs, rt);
+void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondNEZ, rs);
}
-void Mips64Assembler::Bgezc(GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondGEZ, rt);
+void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondF, static_cast<GpuRegister>(ft), ZERO);
}
-void Mips64Assembler::Blezc(GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondLEZ, rt);
+void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare) {
+ Bcond(label, /* is_r6 */ true, is_bare, kCondT, static_cast<GpuRegister>(ft), ZERO);
}
-void Mips64Assembler::Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondLTU, rs, rt);
+void Mips64Assembler::Bltz(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondLTZ, rt);
}
-void Mips64Assembler::Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondGEU, rs, rt);
+void Mips64Assembler::Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondGTZ, rt);
}
-void Mips64Assembler::Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondEQ, rs, rt);
+void Mips64Assembler::Bgez(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondGEZ, rt);
}
-void Mips64Assembler::Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label) {
- Bcond(label, kCondNE, rs, rt);
+void Mips64Assembler::Blez(GpuRegister rt, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondLEZ, rt);
}
-void Mips64Assembler::Beqzc(GpuRegister rs, Mips64Label* label) {
- Bcond(label, kCondEQZ, rs);
+void Mips64Assembler::Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondEQ, rs, rt);
}
-void Mips64Assembler::Bnezc(GpuRegister rs, Mips64Label* label) {
- Bcond(label, kCondNEZ, rs);
+void Mips64Assembler::Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondNE, rs, rt);
}
-void Mips64Assembler::Bc1eqz(FpuRegister ft, Mips64Label* label) {
- Bcond(label, kCondF, static_cast<GpuRegister>(ft), ZERO);
+void Mips64Assembler::Beqz(GpuRegister rs, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondEQZ, rs);
}
-void Mips64Assembler::Bc1nez(FpuRegister ft, Mips64Label* label) {
- Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
+void Mips64Assembler::Bnez(GpuRegister rs, Mips64Label* label, bool is_bare) {
+ CHECK(is_bare);
+ Bcond(label, /* is_r6 */ false, is_bare, kCondNEZ, rs);
}
void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base,
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index c39d120bce..dd6dcd1896 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -86,7 +86,7 @@ static inline int InstrCountForLoadReplicatedConst32(int64_t value) {
int32_t y = High32Bits(value);
if (x == y) {
- return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0 && IsInt<16>(value >> 16))) ? 2 : 3;
+ return (IsUint<16>(x) || IsInt<16>(x) || ((x & 0xFFFF) == 0)) ? 2 : 3;
}
return INT_MAX;
@@ -563,7 +563,14 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void Bnezc(GpuRegister rs, uint32_t imm21);
void Bc1eqz(FpuRegister ft, uint16_t imm16);
void Bc1nez(FpuRegister ft, uint16_t imm16);
- void Beqz(GpuRegister rt, uint16_t imm16);
+ void Beq(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R2
+ void Bne(GpuRegister rs, GpuRegister rt, uint16_t imm16); // R2
+ void Beqz(GpuRegister rt, uint16_t imm16); // R2
+ void Bnez(GpuRegister rt, uint16_t imm16); // R2
+ void Bltz(GpuRegister rt, uint16_t imm16); // R2
+ void Bgez(GpuRegister rt, uint16_t imm16); // R2
+ void Blez(GpuRegister rt, uint16_t imm16); // R2
+ void Bgtz(GpuRegister rt, uint16_t imm16); // R2
void AddS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
void SubS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
@@ -599,6 +606,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void FloorWD(FpuRegister fd, FpuRegister fs);
void SelS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
void SelD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void SeleqzS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void SeleqzD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void SelnezS(FpuRegister fd, FpuRegister fs, FpuRegister ft);
+ void SelnezD(FpuRegister fd, FpuRegister fs, FpuRegister ft);
void RintS(FpuRegister fd, FpuRegister fs);
void RintD(FpuRegister fd, FpuRegister fs);
void ClassS(FpuRegister fd, FpuRegister fs);
@@ -922,22 +933,57 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
// the table data) and should be loaded using LoadLabelAddress().
JumpTable* CreateJumpTable(std::vector<Mips64Label*>&& labels);
- void Bc(Mips64Label* label);
- void Balc(Mips64Label* label);
- void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
- void Bltzc(GpuRegister rt, Mips64Label* label);
- void Bgtzc(GpuRegister rt, Mips64Label* label);
- void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
- void Bgezc(GpuRegister rt, Mips64Label* label);
- void Blezc(GpuRegister rt, Mips64Label* label);
- void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
- void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
- void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
- void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label);
- void Beqzc(GpuRegister rs, Mips64Label* label);
- void Bnezc(GpuRegister rs, Mips64Label* label);
- void Bc1eqz(FpuRegister ft, Mips64Label* label);
- void Bc1nez(FpuRegister ft, Mips64Label* label);
+ // When `is_bare` is false, the branches will promote to long (if the range
+ // of the individual branch instruction is insufficient) and the delay/
+ // forbidden slots will be taken care of.
+ // Use `is_bare = false` when the branch target may be out of reach of the
+ // individual branch instruction. IOW, this is for general purpose use.
+ //
+ // When `is_bare` is true, just the branch instructions will be generated
+ // leaving delay/forbidden slot filling up to the caller and the branches
+ // won't promote to long if the range is insufficient (you'll get a
+ // compilation error when the range is exceeded).
+ // Use `is_bare = true` when the branch target is known to be within reach
+ // of the individual branch instruction. This is intended for small local
+ // optimizations around delay/forbidden slots.
+ // Also prefer using `is_bare = true` if the code near the branch is to be
+ // patched or analyzed at run time (e.g. introspection) to
+ // - show the intent and
+ // - fail during compilation rather than during patching/execution if the
+ // bare branch range is insufficent but the code size and layout are
+ // expected to remain unchanged
+ //
+ // R6 compact branches without delay/forbidden slots.
+ void Bc(Mips64Label* label, bool is_bare = false);
+ void Balc(Mips64Label* label, bool is_bare = false);
+ // R6 compact branches with forbidden slots.
+ void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Bltzc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Bgtzc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Bgec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Bgezc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Blezc(GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Bltuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Bgeuc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Beqc(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Bnec(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false);
+ void Beqzc(GpuRegister rs, Mips64Label* label, bool is_bare = false);
+ void Bnezc(GpuRegister rs, Mips64Label* label, bool is_bare = false);
+ // R6 branches with delay slots.
+ void Bc1eqz(FpuRegister ft, Mips64Label* label, bool is_bare = false);
+ void Bc1nez(FpuRegister ft, Mips64Label* label, bool is_bare = false);
+ // R2 branches with delay slots that are also available on R6.
+ // The `is_bare` parameter exists and is checked in these branches only to
+ // prevent programming mistakes. These branches never promote to long, not
+ // even if `is_bare` is false.
+ void Bltz(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2
+ void Bgtz(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2
+ void Bgez(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2
+ void Blez(GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2
+ void Beq(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2
+ void Bne(GpuRegister rs, GpuRegister rt, Mips64Label* label, bool is_bare = false); // R2
+ void Beqz(GpuRegister rs, Mips64Label* label, bool is_bare = false); // R2
+ void Bnez(GpuRegister rs, Mips64Label* label, bool is_bare = false); // R2
void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword);
@@ -1379,10 +1425,16 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
class Branch {
public:
enum Type {
- // Short branches.
+ // R6 short branches (can be promoted to long).
kUncondBranch,
kCondBranch,
kCall,
+ // R6 short branches (can't be promoted to long), forbidden/delay slots filled manually.
+ kBareUncondBranch,
+ kBareCondBranch,
+ kBareCall,
+ // R2 short branches (can't be promoted to long), delay slots filled manually.
+ kR2BareCondBranch,
// Near label.
kLabel,
// Near literals.
@@ -1425,8 +1477,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
// different origins, e.g. to PC or PC+4. Encode the origin distance (as a number of 4-byte
// instructions) from the instruction containing the offset.
uint32_t pc_org;
- // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch is
- // an exception: use kOffset23 for beqzc/bnezc).
+ // How large (in bits) a PC-relative offset can be for a given type of branch (kCondBranch
+ // and kBareCondBranch are an exception: use kOffset23 for beqzc/bnezc).
OffsetBits offset_size;
// Some MIPS instructions with PC-relative offsets shift the offset by 2. Encode the shift
// count.
@@ -1435,13 +1487,15 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
static const BranchInfo branch_info_[/* Type */];
// Unconditional branch or call.
- Branch(uint32_t location, uint32_t target, bool is_call);
+ Branch(uint32_t location, uint32_t target, bool is_call, bool is_bare);
// Conditional branch.
- Branch(uint32_t location,
+ Branch(bool is_r6,
+ uint32_t location,
uint32_t target,
BranchCondition condition,
GpuRegister lhs_reg,
- GpuRegister rhs_reg);
+ GpuRegister rhs_reg,
+ bool is_bare);
// Label address (in literal area) or literal.
Branch(uint32_t location, GpuRegister dest_reg, Type label_or_literal_type);
@@ -1467,6 +1521,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
uint32_t GetOldSize() const;
uint32_t GetEndLocation() const;
uint32_t GetOldEndLocation() const;
+ bool IsBare() const;
bool IsLong() const;
bool IsResolved() const;
@@ -1527,7 +1582,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
private:
// Completes branch construction by determining and recording its type.
- void InitializeType(Type initial_type);
+ void InitializeType(Type initial_type, bool is_r6);
// Helper for the above.
void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type);
@@ -1554,7 +1609,8 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void EmitI26(int opcode, uint32_t imm26);
void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
- void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+ void EmitBcondR6(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+ void EmitBcondR2(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint16_t imm16);
void EmitMsa3R(int operation,
int df,
VectorRegister wt,
@@ -1568,12 +1624,14 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
- void Buncond(Mips64Label* label);
+ void Buncond(Mips64Label* label, bool is_bare);
void Bcond(Mips64Label* label,
+ bool is_r6,
+ bool is_bare,
BranchCondition condition,
GpuRegister lhs,
GpuRegister rhs = ZERO);
- void Call(Mips64Label* label);
+ void Call(Mips64Label* label, bool is_bare);
void FinalizeLabeledBranch(Mips64Label* label);
Branch* GetBranch(uint32_t branch_id);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 021e335697..fc0bd368ea 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -257,11 +257,46 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
return result;
}
+ void BranchHelper(void (mips64::Mips64Assembler::*f)(mips64::Mips64Label*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ mips64::Mips64Label label1, label2;
+ (Base::GetAssembler()->*f)(&label1, is_bare);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label1);
+ (Base::GetAssembler()->*f)(&label2, is_bare);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label2);
+ (Base::GetAssembler()->*f)(&label1, is_bare);
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " 1f\n" +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ instr_name + " 2f\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ "2:\n" +
+ instr_name + " 1b\n" +
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, instr_name);
+ }
+
void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
- mips64::Mips64Label*),
- const std::string& instr_name) {
+ mips64::Mips64Label*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
mips64::Mips64Label label;
- (Base::GetAssembler()->*f)(mips64::A0, &label);
+ (Base::GetAssembler()->*f)(mips64::A0, &label, is_bare);
constexpr size_t kAdduCount1 = 63;
for (size_t i = 0; i != kAdduCount1; ++i) {
__ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
@@ -271,26 +306,30 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
for (size_t i = 0; i != kAdduCount2; ++i) {
__ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
}
- (Base::GetAssembler()->*f)(mips64::A1, &label);
+ (Base::GetAssembler()->*f)(mips64::A1, &label, is_bare);
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
std::string expected =
".set noreorder\n" +
- instr_name + " $a0, 1f\n"
- "nop\n" +
+ instr_name + " $a0, 1f\n" +
+ (is_bare ? "" : "nop\n") +
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
"1:\n" +
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- instr_name + " $a1, 1b\n"
- "nop\n";
+ instr_name + " $a1, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
DriverStr(expected, instr_name);
}
void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
mips64::GpuRegister,
- mips64::Mips64Label*),
- const std::string& instr_name) {
+ mips64::Mips64Label*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
mips64::Mips64Label label;
- (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
+ (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label, is_bare);
constexpr size_t kAdduCount1 = 63;
for (size_t i = 0; i != kAdduCount1; ++i) {
__ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
@@ -300,17 +339,51 @@ class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
for (size_t i = 0; i != kAdduCount2; ++i) {
__ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
}
- (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label);
+ (Base::GetAssembler()->*f)(mips64::A2, mips64::A3, &label, is_bare);
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
std::string expected =
".set noreorder\n" +
- instr_name + " $a0, $a1, 1f\n"
- "nop\n" +
+ instr_name + " $a0, $a1, 1f\n" +
+ (is_bare ? "" : "nop\n") +
RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
"1:\n" +
RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- instr_name + " $a2, $a3, 1b\n"
- "nop\n";
+ instr_name + " $a2, $a3, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
+ DriverStr(expected, instr_name);
+ }
+
+ void BranchFpuCondHelper(void (mips64::Mips64Assembler::*f)(mips64::FpuRegister,
+ mips64::Mips64Label*,
+ bool),
+ const std::string& instr_name,
+ bool is_bare = false) {
+ mips64::Mips64Label label;
+ (Base::GetAssembler()->*f)(mips64::F0, &label, is_bare);
+ constexpr size_t kAdduCount1 = 63;
+ for (size_t i = 0; i != kAdduCount1; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+ constexpr size_t kAdduCount2 = 64;
+ for (size_t i = 0; i != kAdduCount2; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ (Base::GetAssembler()->*f)(mips64::F31, &label, is_bare);
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+
+ std::string expected =
+ ".set noreorder\n" +
+ instr_name + " $f0, 1f\n" +
+ (is_bare ? "" : "nop\n") +
+ RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
+ "1:\n" +
+ RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
+ instr_name + " $f31, 1b\n" +
+ (is_bare ? "" : "nop\n") +
+ "addu $zero, $zero, $zero\n";
DriverStr(expected, instr_name);
}
@@ -452,6 +525,26 @@ TEST_F(AssemblerMIPS64Test, SelD) {
DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d");
}
+TEST_F(AssemblerMIPS64Test, SeleqzS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::SeleqzS, "seleqz.s ${reg1}, ${reg2}, ${reg3}"),
+ "seleqz.s");
+}
+
+TEST_F(AssemblerMIPS64Test, SeleqzD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::SeleqzD, "seleqz.d ${reg1}, ${reg2}, ${reg3}"),
+ "seleqz.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SelnezS) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelnezS, "selnez.s ${reg1}, ${reg2}, ${reg3}"),
+ "selnez.s");
+}
+
+TEST_F(AssemblerMIPS64Test, SelnezD) {
+ DriverStr(RepeatFFF(&mips64::Mips64Assembler::SelnezD, "selnez.d ${reg1}, ${reg2}, ${reg3}"),
+ "selnez.d");
+}
+
TEST_F(AssemblerMIPS64Test, RintS) {
DriverStr(RepeatFF(&mips64::Mips64Assembler::RintS, "rint.s ${reg1}, ${reg2}"), "rint.s");
}
@@ -668,120 +761,21 @@ TEST_F(AssemblerMIPS64Test, Sdc1) {
"sdc1");
}
-////////////////
-// CALL / JMP //
-////////////////
+//////////////
+// BRANCHES //
+//////////////
TEST_F(AssemblerMIPS64Test, Jalr) {
DriverStr(".set noreorder\n" +
RepeatRRNoDupes(&mips64::Mips64Assembler::Jalr, "jalr ${reg1}, ${reg2}"), "jalr");
}
-TEST_F(AssemblerMIPS64Test, Balc) {
- mips64::Mips64Label label1, label2;
- __ Balc(&label1);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label1);
- __ Balc(&label2);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label2);
- __ Balc(&label1);
-
- std::string expected =
- ".set noreorder\n"
- "balc 1f\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n"
- "balc 2f\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "2:\n"
- "balc 1b\n";
- DriverStr(expected, "Balc");
-}
-
-TEST_F(AssemblerMIPS64Test, LongBalc) {
- constexpr uint32_t kNopCount1 = (1u << 25) + 1;
- constexpr uint32_t kNopCount2 = (1u << 25) + 1;
- constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
- ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
- __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
- mips64::Mips64Label label1, label2;
- __ Balc(&label1);
- for (uint32_t i = 0; i != kNopCount1; ++i) {
- __ Nop();
- }
- __ Bind(&label1);
- __ Balc(&label2);
- for (uint32_t i = 0; i != kNopCount2; ++i) {
- __ Nop();
- }
- __ Bind(&label2);
- __ Balc(&label1);
-
- uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jialc.
- offset_forward1 <<= 2;
- offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jialc.
-
- uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jialc.
- offset_forward2 <<= 2;
- offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jialc.
-
- uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jialc.
- offset_back <<= 2;
- offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jialc.
-
- // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
- // instead of generating them ourselves in the source code. This saves a few minutes
- // of test time.
- std::ostringstream oss;
- oss <<
- ".set noreorder\n"
- "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
- "jialc $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
- ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
- "1:\n"
- "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
- "jialc $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
- ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
- "2:\n"
- "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
- "jialc $at, 0x" << std::hex << Low16Bits(offset_back) << "\n";
- std::string expected = oss.str();
- DriverStr(expected, "LongBalc");
-}
-
TEST_F(AssemblerMIPS64Test, Bc) {
- mips64::Mips64Label label1, label2;
- __ Bc(&label1);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label1);
- __ Bc(&label2);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label2);
- __ Bc(&label1);
+ BranchHelper(&mips64::Mips64Assembler::Bc, "Bc");
+}
- std::string expected =
- ".set noreorder\n"
- "bc 1f\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n"
- "bc 2f\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "2:\n"
- "bc 1b\n";
- DriverStr(expected, "Bc");
+TEST_F(AssemblerMIPS64Test, Balc) {
+ BranchHelper(&mips64::Mips64Assembler::Balc, "Balc");
}
TEST_F(AssemblerMIPS64Test, Beqzc) {
@@ -833,55 +827,107 @@ TEST_F(AssemblerMIPS64Test, Bgeuc) {
}
TEST_F(AssemblerMIPS64Test, Bc1eqz) {
- mips64::Mips64Label label;
- __ Bc1eqz(mips64::F0, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bc1eqz(mips64::F31, &label);
-
- std::string expected =
- ".set noreorder\n"
- "bc1eqz $f0, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "bc1eqz $f31, 1b\n"
- "nop\n";
- DriverStr(expected, "Bc1eqz");
+ BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz");
}
TEST_F(AssemblerMIPS64Test, Bc1nez) {
- mips64::Mips64Label label;
- __ Bc1nez(mips64::F0, &label);
- constexpr size_t kAdduCount1 = 63;
- for (size_t i = 0; i != kAdduCount1; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label);
- constexpr size_t kAdduCount2 = 64;
- for (size_t i = 0; i != kAdduCount2; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bc1nez(mips64::F31, &label);
+ BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez");
+}
- std::string expected =
- ".set noreorder\n"
- "bc1nez $f0, 1f\n"
- "nop\n" +
- RepeatInsn(kAdduCount1, "addu $zero, $zero, $zero\n") +
- "1:\n" +
- RepeatInsn(kAdduCount2, "addu $zero, $zero, $zero\n") +
- "bc1nez $f31, 1b\n"
- "nop\n";
- DriverStr(expected, "Bc1nez");
+TEST_F(AssemblerMIPS64Test, BareBc) {
+ BranchHelper(&mips64::Mips64Assembler::Bc, "Bc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBalc) {
+ BranchHelper(&mips64::Mips64Assembler::Balc, "Balc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeqzc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqzc, "Beqzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBnezc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnezc, "Bnezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltzc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltzc, "Bltzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgezc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgezc, "Bgezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBlezc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Blezc, "Blezc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgtzc) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtzc, "Bgtzc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeqc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beqc, "Beqc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBnec) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bnec, "Bnec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltc, "Bltc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgec) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgec, "Bgec", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltuc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bltuc, "Bltuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgeuc) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bgeuc, "Bgeuc", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBc1eqz) {
+ BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1eqz, "Bc1eqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBc1nez) {
+ BranchFpuCondHelper(&mips64::Mips64Assembler::Bc1nez, "Bc1nez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeqz) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Beqz, "Beqz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBnez) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bnez, "Bnez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBltz) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bltz, "Bltz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgez) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgez, "Bgez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBlez) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Blez, "Blez", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBgtz) {
+ BranchCondOneRegHelper(&mips64::Mips64Assembler::Bgtz, "Bgtz", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBeq) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Beq, "Beq", /* is_bare */ true);
+}
+
+TEST_F(AssemblerMIPS64Test, BareBne) {
+ BranchCondTwoRegsHelper(&mips64::Mips64Assembler::Bne, "Bne", /* is_bare */ true);
}
TEST_F(AssemblerMIPS64Test, LongBeqc) {
@@ -924,6 +970,102 @@ TEST_F(AssemblerMIPS64Test, LongBeqc) {
DriverStr(expected, "LongBeqc");
}
+TEST_F(AssemblerMIPS64Test, LongBeqzc) {
+ constexpr uint32_t kNopCount1 = (1u << 20) + 1;
+ constexpr uint32_t kNopCount2 = (1u << 20) + 1;
+ constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
+ ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
+ __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
+ mips64::Mips64Label label;
+ __ Beqzc(mips64::A0, &label);
+ for (uint32_t i = 0; i != kNopCount1; ++i) {
+ __ Nop();
+ }
+ __ Bind(&label);
+ for (uint32_t i = 0; i != kNopCount2; ++i) {
+ __ Nop();
+ }
+ __ Beqzc(mips64::A2, &label);
+
+ uint32_t offset_forward = 2 + kNopCount1; // 2: account for auipc and jic.
+ offset_forward <<= 2;
+ offset_forward += (offset_forward & 0x8000) << 1; // Account for sign extension in jic.
+
+ uint32_t offset_back = -(kNopCount2 + 1); // 1: account for bnezc.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jic.
+
+ // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
+ // instead of generating them ourselves in the source code. This saves test time.
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "bnezc $a0, 1f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_forward) << "\n"
+ "1:\n" <<
+ ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
+ "2:\n" <<
+ ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
+ "bnezc $a2, 3f\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "jic $at, 0x" << std::hex << Low16Bits(offset_back) << "\n"
+ "3:\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBeqzc");
+}
+
+TEST_F(AssemblerMIPS64Test, LongBalc) {
+ constexpr uint32_t kNopCount1 = (1u << 25) + 1;
+ constexpr uint32_t kNopCount2 = (1u << 25) + 1;
+ constexpr uint32_t kRequiredCapacity = (kNopCount1 + kNopCount2 + 6u) * 4u;
+ ASSERT_LT(__ GetBuffer()->Capacity(), kRequiredCapacity);
+ __ GetBuffer()->ExtendCapacity(kRequiredCapacity);
+ mips64::Mips64Label label1, label2;
+ __ Balc(&label1);
+ for (uint32_t i = 0; i != kNopCount1; ++i) {
+ __ Nop();
+ }
+ __ Bind(&label1);
+ __ Balc(&label2);
+ for (uint32_t i = 0; i != kNopCount2; ++i) {
+ __ Nop();
+ }
+ __ Bind(&label2);
+ __ Balc(&label1);
+
+ uint32_t offset_forward1 = 2 + kNopCount1; // 2: account for auipc and jialc.
+ offset_forward1 <<= 2;
+ offset_forward1 += (offset_forward1 & 0x8000) << 1; // Account for sign extension in jialc.
+
+ uint32_t offset_forward2 = 2 + kNopCount2; // 2: account for auipc and jialc.
+ offset_forward2 <<= 2;
+ offset_forward2 += (offset_forward2 & 0x8000) << 1; // Account for sign extension in jialc.
+
+ uint32_t offset_back = -(2 + kNopCount2); // 2: account for auipc and jialc.
+ offset_back <<= 2;
+ offset_back += (offset_back & 0x8000) << 1; // Account for sign extension in jialc.
+
+ // Note, we're using the ".fill" directive to tell the assembler to generate many NOPs
+ // instead of generating them ourselves in the source code. This saves a few minutes
+ // of test time.
+ std::ostringstream oss;
+ oss <<
+ ".set noreorder\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward1) << "\n"
+ "jialc $at, 0x" << std::hex << Low16Bits(offset_forward1) << "\n"
+ ".fill 0x" << std::hex << kNopCount1 << " , 4, 0\n"
+ "1:\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_forward2) << "\n"
+ "jialc $at, 0x" << std::hex << Low16Bits(offset_forward2) << "\n"
+ ".fill 0x" << std::hex << kNopCount2 << " , 4, 0\n"
+ "2:\n"
+ "auipc $at, 0x" << std::hex << High16Bits(offset_back) << "\n"
+ "jialc $at, 0x" << std::hex << Low16Bits(offset_back) << "\n";
+ std::string expected = oss.str();
+ DriverStr(expected, "LongBalc");
+}
+
//////////
// MISC //
//////////
@@ -961,235 +1103,6 @@ TEST_F(AssemblerMIPS64Test, Addiupc) {
DriverStr(RepeatRIb(&mips64::Mips64Assembler::Addiupc, 19, code), "Addiupc");
}
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) {
- mips64::Mips64Label label;
- __ LoadLabelAddress(mips64::V0, &label);
- constexpr uint32_t kAdduCount = 0x3FFDE;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label);
-
- std::string expected =
- "lapc $v0, 1f\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "1:\n";
- DriverStr(expected, "LoadFarthestNearLabelAddress");
- EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) {
- mips64::Mips64Label label;
- __ LoadLabelAddress(mips64::V0, &label);
- constexpr uint32_t kAdduCount = 0x3FFDF;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- __ Bind(&label);
-
- std::string expected =
- "1:\n"
- "auipc $at, %hi(2f - 1b)\n"
- "daddiu $v0, $at, %lo(2f - 1b)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "2:\n";
- DriverStr(expected, "LoadNearestFarLabelAddress");
- EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) {
- mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
- constexpr uint32_t kAdduCount = 0x3FFDE;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
-
- std::string expected =
- "lwpc $v0, 1f\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "1:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadFarthestNearLiteral");
- EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) {
- mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
- constexpr uint32_t kAdduCount = 0x3FFDF;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
-
- std::string expected =
- "1:\n"
- "auipc $at, %hi(2f - 1b)\n"
- "lw $v0, %lo(2f - 1b)($at)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "2:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadNearestFarLiteral");
- EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) {
- mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
- constexpr uint32_t kAdduCount = 0x3FFDE;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
-
- std::string expected =
- "lwupc $v0, 1f\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "1:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadFarthestNearLiteralUnsigned");
- EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) {
- mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
- __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
- constexpr uint32_t kAdduCount = 0x3FFDF;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
-
- std::string expected =
- "1:\n"
- "auipc $at, %hi(2f - 1b)\n"
- "lwu $v0, %lo(2f - 1b)($at)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "2:\n"
- ".word 0x12345678\n";
- DriverStr(expected, "LoadNearestFarLiteralUnsigned");
- EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) {
- mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
- __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
- constexpr uint32_t kAdduCount = 0x3FFDD;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
-
- std::string expected =
- "ldpc $v0, 1f\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "1:\n"
- ".dword 0x0123456789ABCDEF\n";
- DriverStr(expected, "LoadFarthestNearLiteralLong");
- EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) {
- mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
- __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
- constexpr uint32_t kAdduCount = 0x3FFDE;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
-
- std::string expected =
- "1:\n"
- "auipc $at, %hi(2f - 1b)\n"
- "ld $v0, %lo(2f - 1b)($at)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "2:\n"
- ".dword 0x0123456789ABCDEF\n";
- DriverStr(expected, "LoadNearestFarLiteralLong");
- EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
-}
-
-TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) {
- mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
- mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
- mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA));
- __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
- __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
- __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3);
- __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
- __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
- // A nop will be inserted here before the 64-bit literals.
-
- std::string expected =
- "ldpc $a1, 1f\n"
- // The GNU assembler incorrectly requires the ldpc instruction to be located
- // at an address that's a multiple of 8. TODO: Remove this workaround if/when
- // the assembler is fixed.
- // "ldpc $a2, 2f\n"
- ".word 0xECD80004\n"
- "ldpc $a3, 3f\n"
- "lapc $v0, 1f\n"
- "lapc $v1, 2f\n"
- "nop\n"
- "1:\n"
- ".dword 0x0123456789ABCDEF\n"
- "2:\n"
- ".dword 0x5555555555555555\n"
- "3:\n"
- ".dword 0xAAAAAAAAAAAAAAAA\n";
- DriverStr(expected, "LongLiteralAlignmentNop");
- EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u);
- EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u);
- EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u);
-}
-
-TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) {
- mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
- mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
- __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
- __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
- __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
- __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
-
- std::string expected =
- "ldpc $a1, 1f\n"
- // The GNU assembler incorrectly requires the ldpc instruction to be located
- // at an address that's a multiple of 8. TODO: Remove this workaround if/when
- // the assembler is fixed.
- // "ldpc $a2, 2f\n"
- ".word 0xECD80003\n"
- "lapc $v0, 1f\n"
- "lapc $v1, 2f\n"
- "1:\n"
- ".dword 0x0123456789ABCDEF\n"
- "2:\n"
- ".dword 0x5555555555555555\n";
- DriverStr(expected, "LongLiteralAlignmentNoNop");
- EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u);
- EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u);
-}
-
-TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) {
- mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
- __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
- __ LoadLabelAddress(mips64::V1, literal->GetLabel());
- constexpr uint32_t kAdduCount = 0x3FFDF;
- for (uint32_t i = 0; i != kAdduCount; ++i) {
- __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
- }
- // A nop will be inserted here before the 64-bit literal.
-
- std::string expected =
- "1:\n"
- "auipc $at, %hi(3f - 1b)\n"
- "ld $v0, %lo(3f - 1b)($at)\n"
- "2:\n"
- "auipc $at, %hi(3f - 2b)\n"
- "daddiu $v1, $at, %lo(3f - 2b)\n" +
- RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
- "nop\n"
- "3:\n"
- ".dword 0x0123456789ABCDEF\n";
- DriverStr(expected, "FarLongLiteralAlignmentNop");
- EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4);
-}
-
TEST_F(AssemblerMIPS64Test, Addu) {
DriverStr(RepeatRRR(&mips64::Mips64Assembler::Addu, "addu ${reg1}, ${reg2}, ${reg3}"), "addu");
}
@@ -2740,6 +2653,235 @@ TEST_F(AssemblerMIPS64Test, LoadConst64) {
EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths);
}
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLabelAddress) {
+ mips64::Mips64Label label;
+ __ LoadLabelAddress(mips64::V0, &label);
+ constexpr uint32_t kAdduCount = 0x3FFDE;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ "lapc $v0, 1f\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "1:\n";
+ DriverStr(expected, "LoadFarthestNearLabelAddress");
+ EXPECT_EQ(__ GetLabelLocation(&label), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLabelAddress) {
+ mips64::Mips64Label label;
+ __ LoadLabelAddress(mips64::V0, &label);
+ constexpr uint32_t kAdduCount = 0x3FFDF;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ __ Bind(&label);
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(2f - 1b)\n"
+ "daddiu $v0, $at, %lo(2f - 1b)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n";
+ DriverStr(expected, "LoadNearestFarLabelAddress");
+ EXPECT_EQ(__ GetLabelLocation(&label), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteral) {
+ mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
+ constexpr uint32_t kAdduCount = 0x3FFDE;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+
+ std::string expected =
+ "lwpc $v0, 1f\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadFarthestNearLiteral");
+ EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteral) {
+ mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips64::V0, mips64::kLoadWord, literal);
+ constexpr uint32_t kAdduCount = 0x3FFDF;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(2f - 1b)\n"
+ "lw $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadNearestFarLiteral");
+ EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralUnsigned) {
+ mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
+ constexpr uint32_t kAdduCount = 0x3FFDE;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+
+ std::string expected =
+ "lwupc $v0, 1f\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadFarthestNearLiteralUnsigned");
+ EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralUnsigned) {
+ mips64::Literal* literal = __ NewLiteral<uint32_t>(0x12345678);
+ __ LoadLiteral(mips64::V0, mips64::kLoadUnsignedWord, literal);
+ constexpr uint32_t kAdduCount = 0x3FFDF;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(2f - 1b)\n"
+ "lwu $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".word 0x12345678\n";
+ DriverStr(expected, "LoadNearestFarLiteralUnsigned");
+ EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadFarthestNearLiteralLong) {
+ mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+ __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
+ constexpr uint32_t kAdduCount = 0x3FFDD;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+
+ std::string expected =
+ "ldpc $v0, 1f\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "1:\n"
+ ".dword 0x0123456789ABCDEF\n";
+ DriverStr(expected, "LoadFarthestNearLiteralLong");
+ EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (1 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LoadNearestFarLiteralLong) {
+ mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+ __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
+ constexpr uint32_t kAdduCount = 0x3FFDE;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(2f - 1b)\n"
+ "ld $v0, %lo(2f - 1b)($at)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "2:\n"
+ ".dword 0x0123456789ABCDEF\n";
+ DriverStr(expected, "LoadNearestFarLiteralLong");
+ EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (2 + kAdduCount) * 4);
+}
+
+TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNop) {
+ mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+ mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
+ mips64::Literal* literal3 = __ NewLiteral<uint64_t>(UINT64_C(0xAAAAAAAAAAAAAAAA));
+ __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
+ __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
+ __ LoadLiteral(mips64::A3, mips64::kLoadDoubleword, literal3);
+ __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
+ __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
+ // A nop will be inserted here before the 64-bit literals.
+
+ std::string expected =
+ "ldpc $a1, 1f\n"
+ // The GNU assembler incorrectly requires the ldpc instruction to be located
+ // at an address that's a multiple of 8. TODO: Remove this workaround if/when
+ // the assembler is fixed.
+ // "ldpc $a2, 2f\n"
+ ".word 0xECD80004\n"
+ "ldpc $a3, 3f\n"
+ "lapc $v0, 1f\n"
+ "lapc $v1, 2f\n"
+ "nop\n"
+ "1:\n"
+ ".dword 0x0123456789ABCDEF\n"
+ "2:\n"
+ ".dword 0x5555555555555555\n"
+ "3:\n"
+ ".dword 0xAAAAAAAAAAAAAAAA\n";
+ DriverStr(expected, "LongLiteralAlignmentNop");
+ EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 6 * 4u);
+ EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 8 * 4u);
+ EXPECT_EQ(__ GetLabelLocation(literal3->GetLabel()), 10 * 4u);
+}
+
+TEST_F(AssemblerMIPS64Test, LongLiteralAlignmentNoNop) {
+ mips64::Literal* literal1 = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+ mips64::Literal* literal2 = __ NewLiteral<uint64_t>(UINT64_C(0x5555555555555555));
+ __ LoadLiteral(mips64::A1, mips64::kLoadDoubleword, literal1);
+ __ LoadLiteral(mips64::A2, mips64::kLoadDoubleword, literal2);
+ __ LoadLabelAddress(mips64::V0, literal1->GetLabel());
+ __ LoadLabelAddress(mips64::V1, literal2->GetLabel());
+
+ std::string expected =
+ "ldpc $a1, 1f\n"
+ // The GNU assembler incorrectly requires the ldpc instruction to be located
+ // at an address that's a multiple of 8. TODO: Remove this workaround if/when
+ // the assembler is fixed.
+ // "ldpc $a2, 2f\n"
+ ".word 0xECD80003\n"
+ "lapc $v0, 1f\n"
+ "lapc $v1, 2f\n"
+ "1:\n"
+ ".dword 0x0123456789ABCDEF\n"
+ "2:\n"
+ ".dword 0x5555555555555555\n";
+ DriverStr(expected, "LongLiteralAlignmentNoNop");
+ EXPECT_EQ(__ GetLabelLocation(literal1->GetLabel()), 4 * 4u);
+ EXPECT_EQ(__ GetLabelLocation(literal2->GetLabel()), 6 * 4u);
+}
+
+TEST_F(AssemblerMIPS64Test, FarLongLiteralAlignmentNop) {
+ mips64::Literal* literal = __ NewLiteral<uint64_t>(UINT64_C(0x0123456789ABCDEF));
+ __ LoadLiteral(mips64::V0, mips64::kLoadDoubleword, literal);
+ __ LoadLabelAddress(mips64::V1, literal->GetLabel());
+ constexpr uint32_t kAdduCount = 0x3FFDF;
+ for (uint32_t i = 0; i != kAdduCount; ++i) {
+ __ Addu(mips64::ZERO, mips64::ZERO, mips64::ZERO);
+ }
+ // A nop will be inserted here before the 64-bit literal.
+
+ std::string expected =
+ "1:\n"
+ "auipc $at, %hi(3f - 1b)\n"
+ "ld $v0, %lo(3f - 1b)($at)\n"
+ "2:\n"
+ "auipc $at, %hi(3f - 2b)\n"
+ "daddiu $v1, $at, %lo(3f - 2b)\n" +
+ RepeatInsn(kAdduCount, "addu $zero, $zero, $zero\n") +
+ "nop\n"
+ "3:\n"
+ ".dword 0x0123456789ABCDEF\n";
+ DriverStr(expected, "FarLongLiteralAlignmentNop");
+ EXPECT_EQ(__ GetLabelLocation(literal->GetLabel()), (5 + kAdduCount) * 4);
+}
+
// MSA instructions.
TEST_F(AssemblerMIPS64Test, AndV) {
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index b50f1af8f9..b89af10749 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1606,6 +1606,42 @@ void X86Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x68);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x69);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6A);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0x6D);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
DCHECK(shift_count.is_uint8());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 8578340ea7..511eeb9973 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -546,6 +546,11 @@ class X86Assembler FINAL : public Assembler {
void punpckldq(XmmRegister dst, XmmRegister src);
void punpcklqdq(XmmRegister dst, XmmRegister src);
+ void punpckhbw(XmmRegister dst, XmmRegister src);
+ void punpckhwd(XmmRegister dst, XmmRegister src);
+ void punpckhdq(XmmRegister dst, XmmRegister src);
+ void punpckhqdq(XmmRegister dst, XmmRegister src);
+
void psllw(XmmRegister reg, const Immediate& shift_count);
void pslld(XmmRegister reg, const Immediate& shift_count);
void psllq(XmmRegister reg, const Immediate& shift_count);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 3e1244ed5d..d2122db3fa 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -777,6 +777,22 @@ TEST_F(AssemblerX86Test, Punpcklqdq) {
DriverStr(RepeatFF(&x86::X86Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
}
+TEST_F(AssemblerX86Test, Punpckhbw) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86Test, Punpckhwd) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86Test, Punpckhdq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86Test, Punpckhqdq) {
+ DriverStr(RepeatFF(&x86::X86Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
TEST_F(AssemblerX86Test, psllw) {
GetAssembler()->psllw(x86::XMM0, CreateImmediate(16));
DriverStr("psllw $0x10, %xmm0\n", "psllwi");
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index ea69a1c9be..3bff67d2f2 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1835,6 +1835,46 @@ void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
}
+void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x68);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x69);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x6A);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
+void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0x6D);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
DCHECK(shift_count.is_uint8());
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1931,6 +1971,18 @@ void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
}
+void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
+ DCHECK(shift_count.is_uint8());
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex(false, false, false, false, reg.NeedsRex());
+ EmitUint8(0x0F);
+ EmitUint8(0x73);
+ EmitXmmRegisterOperand(3, reg);
+ EmitUint8(shift_count.value());
+}
+
+
void X86_64Assembler::fldl(const Address& src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xDD);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 41450bff4f..3dab235d1c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -574,6 +574,11 @@ class X86_64Assembler FINAL : public Assembler {
void punpckldq(XmmRegister dst, XmmRegister src);
void punpcklqdq(XmmRegister dst, XmmRegister src);
+ void punpckhbw(XmmRegister dst, XmmRegister src);
+ void punpckhwd(XmmRegister dst, XmmRegister src);
+ void punpckhdq(XmmRegister dst, XmmRegister src);
+ void punpckhqdq(XmmRegister dst, XmmRegister src);
+
void psllw(XmmRegister reg, const Immediate& shift_count);
void pslld(XmmRegister reg, const Immediate& shift_count);
void psllq(XmmRegister reg, const Immediate& shift_count);
@@ -585,6 +590,7 @@ class X86_64Assembler FINAL : public Assembler {
void psrlw(XmmRegister reg, const Immediate& shift_count);
void psrld(XmmRegister reg, const Immediate& shift_count);
void psrlq(XmmRegister reg, const Immediate& shift_count);
+ void psrldq(XmmRegister reg, const Immediate& shift_count);
void flds(const Address& src);
void fstps(const Address& dst);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index ec14e7a825..85afee0746 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1465,6 +1465,22 @@ TEST_F(AssemblerX86_64Test, Punpcklqdq) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
}
+TEST_F(AssemblerX86_64Test, Punpckhbw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhwd) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhdq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+}
+
+TEST_F(AssemblerX86_64Test, Punpckhqdq) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+}
+
TEST_F(AssemblerX86_64Test, Psllw) {
GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
GetAssembler()->psllw(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
@@ -1521,6 +1537,13 @@ TEST_F(AssemblerX86_64Test, Psrlq) {
"psrlq $2, %xmm15\n", "pslrqi");
}
+TEST_F(AssemblerX86_64Test, Psrldq) {
+ GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
+ GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
+ DriverStr("psrldq $1, %xmm0\n"
+ "psrldq $2, %xmm15\n", "pslrdqi");
+}
+
TEST_F(AssemblerX86_64Test, UcomissAddress) {
GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
@@ -2012,7 +2035,7 @@ std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test
x86_64::X86_64ManagedRegister method_reg = ManagedFromCpu(x86_64::RDI);
size_t frame_size = 10 * kStackAlignment;
- assembler->BuildFrame(10 * kStackAlignment, method_reg, spill_regs, entry_spills);
+ assembler->BuildFrame(frame_size, method_reg, spill_regs, entry_spills);
// Construct assembly text counterpart.
std::ostringstream str;
@@ -2048,7 +2071,7 @@ std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_tes
ArrayRef<const ManagedRegister> spill_regs(raw_spill_regs);
size_t frame_size = 10 * kStackAlignment;
- assembler->RemoveFrame(10 * kStackAlignment, spill_regs);
+ assembler->RemoveFrame(frame_size, spill_regs);
// Construct assembly text counterpart.
std::ostringstream str;
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 65389252e2..5c097da16f 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -624,7 +624,7 @@ TEST_F(VerifierDepsTest, ConstClass_Resolved) {
}
TEST_F(VerifierDepsTest, ConstClass_Unresolved) {
- ASSERT_TRUE(VerifyMethod("ConstClass_Unresolved"));
+ ASSERT_FALSE(VerifyMethod("ConstClass_Unresolved"));
ASSERT_TRUE(HasClass("LUnresolvedClass;", false));
}
@@ -634,7 +634,7 @@ TEST_F(VerifierDepsTest, CheckCast_Resolved) {
}
TEST_F(VerifierDepsTest, CheckCast_Unresolved) {
- ASSERT_TRUE(VerifyMethod("CheckCast_Unresolved"));
+ ASSERT_FALSE(VerifyMethod("CheckCast_Unresolved"));
ASSERT_TRUE(HasClass("LUnresolvedClass;", false));
}
@@ -644,7 +644,7 @@ TEST_F(VerifierDepsTest, InstanceOf_Resolved) {
}
TEST_F(VerifierDepsTest, InstanceOf_Unresolved) {
- ASSERT_TRUE(VerifyMethod("InstanceOf_Unresolved"));
+ ASSERT_FALSE(VerifyMethod("InstanceOf_Unresolved"));
ASSERT_TRUE(HasClass("LUnresolvedClass;", false));
}
@@ -654,12 +654,12 @@ TEST_F(VerifierDepsTest, NewInstance_Resolved) {
}
TEST_F(VerifierDepsTest, NewInstance_Unresolved) {
- ASSERT_TRUE(VerifyMethod("NewInstance_Unresolved"));
+ ASSERT_FALSE(VerifyMethod("NewInstance_Unresolved"));
ASSERT_TRUE(HasClass("LUnresolvedClass;", false));
}
TEST_F(VerifierDepsTest, NewArray_Unresolved) {
- ASSERT_TRUE(VerifyMethod("NewArray_Unresolved"));
+ ASSERT_FALSE(VerifyMethod("NewArray_Unresolved"));
ASSERT_TRUE(HasClass("[LUnresolvedClass;", false));
}