152 files changed, 21036 insertions, 4785 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index a879bd8f06..6472613cfe 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -28,6 +28,81 @@ package {
     default_applicable_licenses: ["art_license"],
 }
 
+// Common dependencies for libart-compiler_deps and libartd-compiler_deps.
+cc_defaults {
+    name: "libart-compiler_common_deps",
+    shared_libs: [
+        "libbase",
+        "liblzma", // libelffile(d) dependency; must be repeated here since it's a static lib.
+        "libartpalette",
+    ],
+    header_libs: [
+        "libart_generated_headers",
+    ],
+}
+
+// Dependencies of libart-compiler, used to propagate libart-compiler deps when static linking.
+art_cc_defaults {
+    name: "libart-compiler_deps",
+    defaults: ["libart-compiler_common_deps"],
+    shared_libs: [
+        "libartbase",
+        "libprofile",
+        "libdexfile",
+    ],
+    static_libs: ["libelffile"],
+    codegen: {
+        arm: {
+            // VIXL assembly support for ARM targets.
+            static_libs: [
+                "libvixl",
+            ],
+        },
+        arm64: {
+            // VIXL assembly support for ARM64 targets.
+            static_libs: [
+                "libvixl",
+            ],
+        },
+    },
+    runtime_libs: [
+        // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
+        // `libart-disassembler.so`.
+        "libart-disassembler",
+    ],
+}
+
+// Dependencies of libartd-compiler, used to propagate libartd-compiler deps when static linking.
+art_cc_defaults {
+    name: "libartd-compiler_deps",
+    defaults: ["libart-compiler_common_deps"],
+    shared_libs: [
+        "libartbased",
+        "libprofiled",
+        "libdexfiled",
+    ],
+    static_libs: ["libelffiled"],
+    codegen: {
+        arm: {
+            // VIXL assembly support for ARM targets.
+            static_libs: [
+                "libvixld",
+            ],
+        },
+        arm64: {
+            // VIXL assembly support for ARM64 targets.
+            static_libs: [
+                "libvixld",
+            ],
+        },
+    },
+    runtime_libs: [
+        // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
+        // `libartd-disassembler.so`.
+        "libartd-disassembler",
+    ],
+}
+
 art_cc_defaults {
     name: "libart-compiler-defaults",
     defaults: ["art_defaults"],
@@ -46,6 +121,7 @@ art_cc_defaults {
         "optimizing/bounds_check_elimination.cc",
         "optimizing/builder.cc",
         "optimizing/cha_guard_optimization.cc",
+        "optimizing/code_generation_data.cc",
         "optimizing/code_generator.cc",
         "optimizing/code_generator_utils.cc",
         "optimizing/code_sinking.cc",
@@ -80,7 +156,6 @@ art_cc_defaults {
         "optimizing/reference_type_propagation.cc",
         "optimizing/register_allocation_resolver.cc",
         "optimizing/register_allocator.cc",
-        "optimizing/register_allocator_graph_color.cc",
         "optimizing/register_allocator_linear_scan.cc",
         "optimizing/select_generator.cc",
         "optimizing/scheduler.cc",
@@ -122,6 +197,7 @@ art_cc_defaults {
                 "optimizing/code_generator_arm64.cc",
                 "optimizing/code_generator_vector_arm64_neon.cc",
                 "optimizing/code_generator_vector_arm64_sve.cc",
+                "optimizing/jit_patches_arm64.cc",
                 "optimizing/scheduler_arm64.cc",
                 "optimizing/instruction_simplifier_arm64.cc",
                 "optimizing/intrinsics_arm64.cc",
@@ -132,6 +208,11 @@ art_cc_defaults {
         },
         riscv64: {
             srcs: [
+                "jni/quick/riscv64/calling_convention_riscv64.cc",
+                "optimizing/code_generator_riscv64.cc",
+                "optimizing/intrinsics_riscv64.cc",
+                "utils/riscv64/assembler_riscv64.cc",
+                "utils/riscv64/jni_macro_assembler_riscv64.cc",
                 "utils/riscv64/managed_register_riscv64.cc",
             ],
         },
@@ -210,35 +291,13 @@ art_cc_library {
         "libart-compiler-defaults",
         "dex2oat-pgo-defaults",
         "art_hugepage_defaults",
+        "libart-compiler_deps",
     ],
-    codegen: {
-        arm: {
-            // VIXL assembly support for ARM targets.
-            static_libs: [
-                "libvixl",
-            ],
-        },
-        arm64: {
-            // VIXL assembly support for ARM64 targets.
-            static_libs: [
-                "libvixl",
-            ],
-        },
-    },
     shared_libs: [
+        // libart is not included in libart-compiler_deps to allow libart-compiler(-for-test)
+        // select suitable libart library (either with or without LTO).
         "libart",
-        "libartbase",
-        "libartpalette",
-        "libprofile",
-        "libdexfile",
     ],
-    static_libs: ["libelffile"],
-    runtime_libs: [
-        // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
-        // `libart-disassembler.so`.
-        "libart-disassembler",
-    ],
-
     target: {
         android: {
             lto: {
@@ -253,6 +312,22 @@ art_cc_library {
     ],
 }
 
+// For static linking with gtests. Same as `libart-compiler`, but without LTO.
+// When gtests static link a library with LTO enabled, they are also built with LTO.
+// This makes the build process use a lot of memory. b/277207452
+art_cc_library_static {
+    name: "libart-compiler-for-test",
+    defaults: [
+        "libart-compiler-defaults",
+        "dex2oat-pgo-defaults",
+        "art_hugepage_defaults",
+        "libart-compiler_deps",
+    ],
+    header_libs: [
+        "libart_headers",
+    ],
+}
+
 cc_defaults {
     name: "libart-compiler_static_defaults",
     defaults: [
@@ -266,48 +341,32 @@ cc_defaults {
     whole_static_libs: ["libart-compiler"],
 }
 
+// libart-compiler_static_defaults for standalone gtests.
+// Uses libart-for-test_static_defaults instead of libart_static_defaults.
+// Uses libart-compiler-for-test instead of libart-compiler.
+cc_defaults {
+    name: "libart-compiler-for-test_static_defaults",
+    defaults: [
+        "libart-compiler_static_base_defaults",
+        "libart-disassembler_static_defaults",
+        "libart-for-test_static_defaults",
+        "libartbase_static_defaults",
+        "libdexfile_static_defaults",
+        "libprofile_static_defaults",
+    ],
+    whole_static_libs: ["libart-compiler-for-test"],
+}
+
 art_cc_library {
     name: "libartd-compiler",
     defaults: [
         "art_debug_defaults",
         "libart-compiler-defaults",
+        "libartd-compiler_deps",
     ],
-    codegen: {
-        arm: {
-            // VIXL assembly support for ARM targets.
-            static_libs: [
-                "libvixld",
-            ],
-            // Export vixl headers as they are included in this library's headers used by tests.
-            export_static_lib_headers: [
-                "libvixld",
-            ],
-        },
-        arm64: {
-            // VIXL assembly support for ARM64 targets.
-            static_libs: [
-                "libvixld",
-            ],
-            // Export vixl headers as they are included in this library's headers used by tests.
-            export_static_lib_headers: [
-                "libvixld",
-            ],
-        },
-    },
     shared_libs: [
-        "libartbased",
         "libartd",
-        "libartpalette",
-        "libprofiled",
-        "libdexfiled",
     ],
-    static_libs: ["libelffiled"],
-    runtime_libs: [
-        // `art::HGraphVisualizerDisassembler::HGraphVisualizerDisassembler` may dynamically load
-        // `libartd-disassembler.so`.
-        "libartd-disassembler",
-    ],
-
     apex_available: [
         "com.android.art.debug",
         // TODO(b/183882457): This lib doesn't go into com.android.art, but
@@ -330,42 +389,47 @@ cc_defaults {
     whole_static_libs: ["libartd-compiler"],
 }
 
+// libartd-compiler_static_defaults for gtests.
+// Uses libartd-for-test_static_defaults instead of libart_static_defaults.
+// Note that `libartd-compiler-for-test` is not required here, because `libartd-compiler`
+// doesn't use LTO.
+cc_defaults {
+    name: "libartd-compiler-for-test_static_defaults",
+    defaults: [
+        "libart-compiler_static_base_defaults",
+        "libartbased_static_defaults",
+        "libartd-disassembler_static_defaults",
+        "libartd-for-test_static_defaults",
+        "libdexfiled_static_defaults",
+        "libprofiled_static_defaults",
+    ],
+    whole_static_libs: ["libartd-compiler"],
+}
+
 // Properties common to `libart-compiler-gtest` and `libartd-compiler-gtest`.
 art_cc_defaults {
     name: "libart-compiler-gtest-common",
     srcs: [
         "common_compiler_test.cc",
     ],
-    shared_libs: [
-        "libbase",
-    ],
 }
 
-art_cc_library {
+art_cc_library_static {
     name: "libart-compiler-gtest",
     defaults: [
         "libart-gtest-defaults",
         "libart-compiler-gtest-common",
-    ],
-    shared_libs: [
-        "libart-compiler",
-        "libart-disassembler",
-        "libartbase-art-gtest",
-        "libart-runtime-gtest",
+        "libart-compiler-for-test_static_defaults",
     ],
 }
 
-art_cc_library {
+art_cc_library_static {
     name: "libartd-compiler-gtest",
     defaults: [
-        "libartd-gtest-defaults",
+        "art_debug_defaults",
+        "libart-gtest-defaults",
         "libart-compiler-gtest-common",
-    ],
-    shared_libs: [
-        "libartd-compiler",
-        "libartd-disassembler",
-        "libartbased-art-gtest",
-        "libartd-runtime-gtest",
+        "libartd-compiler-for-test_static_defaults",
     ],
 }
 
@@ -474,11 +538,6 @@ art_cc_defaults {
         "libnativehelper_header_only",
     ],
 
-    shared_libs: [
-        "libnativeloader",
-        "libunwindstack",
-    ],
-
     target: {
         host: {
             shared_libs: [
@@ -496,15 +555,8 @@ art_cc_test {
         "art_gtest_defaults",
         "art_compiler_tests_defaults",
     ],
-    shared_libs: [
-        "libprofiled",
-        "libartd-simulator-container",
-        "liblzma",
-    ],
     static_libs: [
-        "libartd-compiler",
-        "libelffiled",
-        "libvixld",
+        "libartd-simulator-container",
     ],
 }
 
@@ -516,21 +568,8 @@ art_cc_test {
         "art_compiler_tests_defaults",
     ],
     data: [":generate-boot-image"],
-    shared_libs: [
-        "libprofile",
-        "liblzma",
-        "libartpalette",
-    ],
     static_libs: [
-        // For now, link `libart-simulator-container` statically for simplicity,
-        // to save the added complexity to package it in test suites (along with
-        // other test artifacts) and install it on device during tests.
-        // TODO(b/192070541): Consider linking `libart-simulator-container`
-        // dynamically.
         "libart-simulator-container",
-        "libart-compiler",
-        "libelffile",
-        "libvixl",
     ],
     test_config: "art_standalone_compiler_tests.xml",
 }
@@ -551,6 +590,12 @@ art_cc_test {
                 "utils/assembler_thumb_test.cc",
             ],
         },
+        riscv64: {
+            srcs: [
+                "utils/riscv64/assembler_riscv64_test.cc",
+                "utils/riscv64/jni_macro_assembler_riscv64_test.cc",
+            ],
+        },
         x86: {
             srcs: [
                 "utils/x86/assembler_x86_test.cc",
@@ -562,12 +607,7 @@ art_cc_test {
             ],
         },
     },
-    shared_libs: [
-        "liblzma",
-    ],
     static_libs: [
-        "libartd-compiler",
-        "libelffiled",
         "libvixld",
     ],
 }
diff --git a/compiler/art_standalone_compiler_tests.xml b/compiler/art_standalone_compiler_tests.xml
index 394ac8d4fb..8e8636cca4 100644
--- a/compiler/art_standalone_compiler_tests.xml
+++ b/compiler/art_standalone_compiler_tests.xml
@@ -15,6 +15,7 @@
 -->
 <configuration description="Runs art_standalone_compiler_tests.">
     <option name="config-descriptor:metadata" key="mainline-param" value="com.google.android.art.apex" />
+    <option name="config-descriptor:metadata" key="mainline-param" value="com.android.art.apex" />
 
     <target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
         <option name="cleanup" value="true" />
diff --git a/compiler/cfi_test.h b/compiler/cfi_test.h
index e65bee8e2e..6835e92dfd 100644
--- a/compiler/cfi_test.h
+++ b/compiler/cfi_test.h
@@ -131,7 +131,7 @@ class CFITest : public dwarf::DwarfTest {
         }
         // Use the .cfi_ prefix.
         new_line = ".cfi_" + new_line.substr(FindEndOf(new_line, "DW_CFA_"));
-        output->push_back(address + ": " + new_line);
+        output->push_back(ART_FORMAT("{}: {}", address, new_line));
       }
     }
   }
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 442b96e5fa..a37f516759 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -133,9 +133,9 @@ class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCo
   CompiledMethod* CreateCompiledMethod(InstructionSet instruction_set,
                                        ArrayRef<const uint8_t> code,
                                        ArrayRef<const uint8_t> stack_map,
-                                       ArrayRef<const uint8_t> cfi ATTRIBUTE_UNUSED,
+                                       [[maybe_unused]] ArrayRef<const uint8_t> cfi,
                                        ArrayRef<const linker::LinkerPatch> patches,
-                                       bool is_intrinsic ATTRIBUTE_UNUSED) override {
+                                       [[maybe_unused]] bool is_intrinsic) override {
     // Supports only one method at a time.
     CHECK_EQ(instruction_set_, InstructionSet::kNone);
     CHECK_NE(instruction_set, InstructionSet::kNone);
@@ -150,15 +150,15 @@ class CommonCompilerTestImpl::OneCompiledMethodStorage final : public CompiledCo
     return reinterpret_cast<CompiledMethod*>(this);
   }
 
-  ArrayRef<const uint8_t> GetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                       /*out*/ std::string* debug_name  ATTRIBUTE_UNUSED) override {
+  ArrayRef<const uint8_t> GetThunkCode([[maybe_unused]] const linker::LinkerPatch& patch,
+                                       [[maybe_unused]] /*out*/ std::string* debug_name) override {
     LOG(FATAL) << "Unsupported.";
     UNREACHABLE();
   }
 
-  void SetThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
-                    ArrayRef<const uint8_t> code ATTRIBUTE_UNUSED,
-                    const std::string& debug_name ATTRIBUTE_UNUSED) override {
+  void SetThunkCode([[maybe_unused]] const linker::LinkerPatch& patch,
+                    [[maybe_unused]] ArrayRef<const uint8_t> code,
+                    [[maybe_unused]] const std::string& debug_name) override {
     LOG(FATAL) << "Unsupported.";
     UNREACHABLE();
   }
diff --git a/compiler/compiler.h b/compiler/compiler.h
index ce785bb769..6c317f7e02 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -73,12 +73,12 @@ class Compiler {
                                      const DexFile& dex_file,
                                      Handle<mirror::DexCache> dex_cache) const = 0;
 
-  virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
-                          jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
-                          jit::JitMemoryRegion* region ATTRIBUTE_UNUSED,
-                          ArtMethod* method ATTRIBUTE_UNUSED,
-                          CompilationKind compilation_kind ATTRIBUTE_UNUSED,
-                          jit::JitLogger* jit_logger ATTRIBUTE_UNUSED)
+  virtual bool JitCompile([[maybe_unused]] Thread* self,
+                          [[maybe_unused]] jit::JitCodeCache* code_cache,
+                          [[maybe_unused]] jit::JitMemoryRegion* region,
+                          [[maybe_unused]] ArtMethod* method,
+                          [[maybe_unused]] CompilationKind compilation_kind,
+                          [[maybe_unused]] jit::JitLogger* jit_logger)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     return false;
   }
diff --git a/compiler/compiler_reflection_test.cc b/compiler/compiler_reflection_test.cc
index f3c07db136..d8e2b9e5b9 100644
--- a/compiler/compiler_reflection_test.cc
+++ b/compiler/compiler_reflection_test.cc
@@ -29,6 +29,7 @@ namespace art HIDDEN {
 class CompilerReflectionTest : public CommonCompilerTest {};
 
 TEST_F(CompilerReflectionTest, StaticMainMethod) {
+  TEST_DISABLED_FOR_RISCV64();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
   StackHandleScope<1> hs(soa.Self());
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
index 6b72262e26..fe98a578b1 100644
--- a/compiler/debug/elf_debug_frame_writer.h
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -90,7 +90,26 @@ static void WriteCIE(InstructionSet isa, /*inout*/ std::vector<uint8_t>* buffer)
       return;
     }
     case InstructionSet::kRiscv64: {
-      UNIMPLEMENTED(FATAL);
+      dwarf::DebugFrameOpCodeWriter<> opcodes;
+      opcodes.DefCFA(Reg::Riscv64Core(2), 0);  // X2(SP).
+      // core registers.
+      for (int reg = 3; reg < 32; reg++) {  // Skip X0 (Zero), X1 (RA) and X2 (SP).
+        if ((reg >= 5 && reg < 8) || (reg >= 10 && reg < 18) || reg >= 28) {
+          opcodes.Undefined(Reg::Riscv64Core(reg));
+        } else {
+          opcodes.SameValue(Reg::Riscv64Core(reg));
+        }
+      }
+      // fp registers.
+      for (int reg = 0; reg < 32; reg++) {
+        if (reg < 8 || (reg >=10 && reg < 18) || reg >= 28) {
+          opcodes.Undefined(Reg::Riscv64Fp(reg));
+        } else {
+          opcodes.SameValue(Reg::Riscv64Fp(reg));
+        }
+      }
+      auto return_reg = Reg::Riscv64Core(1);  // X1(RA).
+      WriteCIE(is64bit, return_reg, opcodes, buffer);
       return;
     }
     case InstructionSet::kX86: {
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index 4896bc1e9b..5d654e3e06 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -194,7 +194,7 @@ class ElfDebugLineWriter {
           } else {
             directory_index = it->second;
           }
-          full_path = package_name + "/" + file_name;
+          full_path = ART_FORMAT("{}/{}", package_name, file_name);
         }
 
         // Add file entry.
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 8f64d73aa7..505b6c5d8a 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -113,7 +113,7 @@ void WriteDebugInfo(ElfBuilder<ElfTypes>* builder,
 template <typename ElfTypes>
 static std::vector<uint8_t> MakeMiniDebugInfoInternal(
     InstructionSet isa,
-    const InstructionSetFeatures* features ATTRIBUTE_UNUSED,
+    [[maybe_unused]] const InstructionSetFeatures* features,
     typename ElfTypes::Addr text_section_address,
     size_t text_section_size,
     typename ElfTypes::Addr dex_section_address,
@@ -172,11 +172,10 @@ std::vector<uint8_t> MakeMiniDebugInfo(
   }
 }
 
-std::vector<uint8_t> MakeElfFileForJIT(
-    InstructionSet isa,
-    const InstructionSetFeatures* features ATTRIBUTE_UNUSED,
-    bool mini_debug_info,
-    const MethodDebugInfo& method_info) {
+std::vector<uint8_t> MakeElfFileForJIT(InstructionSet isa,
+                                       [[maybe_unused]] const InstructionSetFeatures* features,
+                                       bool mini_debug_info,
+                                       const MethodDebugInfo& method_info) {
   using ElfTypes = ElfRuntimeTypes;
   CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa)));
   CHECK_EQ(method_info.is_code_address_text_relative, false);
@@ -213,13 +212,12 @@ std::vector<uint8_t> MakeElfFileForJIT(
       DCHECK_EQ(sym.st_size, method_info.code_size);
       num_syms++;
     });
-    reader.VisitDebugFrame([&](const Reader::CIE* cie ATTRIBUTE_UNUSED) {
-      num_cies++;
-    }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) {
-      DCHECK_EQ(fde->sym_addr, method_info.code_address);
-      DCHECK_EQ(fde->sym_size, method_info.code_size);
-      num_fdes++;
-    });
+    reader.VisitDebugFrame([&]([[maybe_unused]] const Reader::CIE* cie) { num_cies++; },
+                           [&](const Reader::FDE* fde, [[maybe_unused]] const Reader::CIE* cie) {
+                             DCHECK_EQ(fde->sym_addr, method_info.code_address);
+                             DCHECK_EQ(fde->sym_size, method_info.code_size);
+                             num_fdes++;
+                           });
     DCHECK_EQ(num_syms, 1u);
     DCHECK_LE(num_cies, 1u);
     DCHECK_LE(num_fdes, 1u);
@@ -302,18 +300,20 @@ std::vector<uint8_t> PackElfFileForJIT(
     // ART always produces the same CIE, so we copy the first one and ignore the rest.
     bool copied_cie = false;
     for (Reader& reader : readers) {
-      reader.VisitDebugFrame([&](const Reader::CIE* cie) {
-        if (!copied_cie) {
-          debug_frame->WriteFully(cie->data(), cie->size());
-          copied_cie = true;
-        }
-      }, [&](const Reader::FDE* fde, const Reader::CIE* cie ATTRIBUTE_UNUSED) {
-        DCHECK(copied_cie);
-        DCHECK_EQ(fde->cie_pointer, 0);
-        if (!is_removed_symbol(fde->sym_addr)) {
-          debug_frame->WriteFully(fde->data(), fde->size());
-        }
-      });
+      reader.VisitDebugFrame(
+          [&](const Reader::CIE* cie) {
+            if (!copied_cie) {
+              debug_frame->WriteFully(cie->data(), cie->size());
+              copied_cie = true;
+            }
+          },
+          [&](const Reader::FDE* fde, [[maybe_unused]] const Reader::CIE* cie) {
+            DCHECK(copied_cie);
+            DCHECK_EQ(fde->cie_pointer, 0);
+            if (!is_removed_symbol(fde->sym_addr)) {
+              debug_frame->WriteFully(fde->data(), fde->size());
+            }
+          });
     }
     debug_frame->End();
 
@@ -348,9 +348,8 @@ std::vector<uint8_t> PackElfFileForJIT(
 
 std::vector<uint8_t> WriteDebugElfFileForClasses(
     InstructionSet isa,
-    const InstructionSetFeatures* features ATTRIBUTE_UNUSED,
-    const ArrayRef<mirror::Class*>& types)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
+    [[maybe_unused]] const InstructionSetFeatures* features,
+    const ArrayRef<mirror::Class*>& types) REQUIRES_SHARED(Locks::mutator_lock_) {
   using ElfTypes = ElfRuntimeTypes;
   CHECK_EQ(sizeof(ElfTypes::Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa)));
   std::vector<uint8_t> buffer;
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index 603596f3bc..d0770e952b 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -57,7 +57,7 @@ CompilerOptions::CompilerOptions()
       generate_debug_info_(kDefaultGenerateDebugInfo),
       generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
       generate_build_id_(false),
-      implicit_null_checks_(true),
+      implicit_null_checks_(false),
       implicit_so_checks_(true),
       implicit_suspend_checks_(false),
       compile_pic_(false),
@@ -121,7 +121,8 @@ bool CompilerOptions::ParseRegisterAllocationStrategy(const std::string& option,
   if (option == "linear-scan") {
     register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
   } else if (option == "graph-color") {
-    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor;
+    LOG(ERROR) << "Graph coloring allocator has been removed, using linear scan instead.";
+    register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan;
   } else {
     *error_msg = "Unrecognized register allocation strategy. Try linear-scan, or graph-color.";
     return false;
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index c8a41ce24b..a5b3ae17d0 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -42,6 +42,7 @@ class VerifierDepsTest;
 
 namespace linker {
 class Arm64RelativePatcherTest;
+class Thumb2RelativePatcherTest;
 }  // namespace linker
 
 class ArtMethod;
@@ -115,9 +116,7 @@ class CompilerOptions final {
   }
 
   bool IsAnyCompilationEnabled() const {
-    return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_) &&
-           // TODO(riscv64): remove this when we have compiler support for RISC-V
-           GetInstructionSet() != InstructionSet::kRiscv64;
+    return CompilerFilter::IsAnyCompilationEnabled(compiler_filter_);
   }
 
   size_t GetHugeMethodThreshold() const {
@@ -504,6 +503,7 @@ class CompilerOptions final {
   friend class jit::JitCompiler;
   friend class verifier::VerifierDepsTest;
   friend class linker::Arm64RelativePatcherTest;
+  friend class linker::Thumb2RelativePatcherTest;
 
   template <class Base>
   friend bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string* error_msg);
diff --git a/compiler/driver/compiler_options_map-inl.h b/compiler/driver/compiler_options_map-inl.h
index 79a59625f5..8530df37e4 100644
--- a/compiler/driver/compiler_options_map-inl.h
+++ b/compiler/driver/compiler_options_map-inl.h
@@ -117,7 +117,7 @@ inline bool ReadCompilerOptions(Base& map, CompilerOptions* options, std::string
 #pragma GCC diagnostic ignored "-Wframe-larger-than="
 
 template <typename Map, typename Builder>
-inline void AddCompilerOptionsArgumentParserOptions(Builder& b) {
+NO_INLINE void AddCompilerOptionsArgumentParserOptions(Builder& b) {
   // clang-format off
   b.
       Define("--compiler-filter=_")
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index 82c4998217..75ade55799 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -69,9 +69,10 @@ class ExceptionTest : public CommonRuntimeTest {
 
     dex_ = my_klass_->GetDexCache()->GetDexFile();
 
+    std::vector<uint8_t> fake_code;
     uint32_t code_size = 12;
     for (size_t i = 0 ; i < code_size; i++) {
-      fake_code_.push_back(0x70 | i);
+      fake_code.push_back(0x70 | i);
     }
 
     const uint32_t native_pc_offset = 4u;
@@ -96,16 +97,23 @@ class ExceptionTest : public CommonRuntimeTest {
     const size_t header_size = sizeof(OatQuickMethodHeader);
     const size_t code_alignment = GetInstructionSetCodeAlignment(kRuntimeISA);
 
-    fake_header_code_and_maps_.resize(stack_maps_size + header_size + code_size + code_alignment);
-    // NB: The start of the vector might not have been allocated the desired alignment.
+    fake_header_code_and_maps_size_ = stack_maps_size + header_size + code_size + code_alignment;
+    // Use mmap to make sure we get untagged memory here. Real code gets allocated using
+    // mspace_memalign which is never tagged.
+    fake_header_code_and_maps_ = static_cast<uint8_t*>(mmap(nullptr,
+                                                            fake_header_code_and_maps_size_,
+                                                            PROT_READ | PROT_WRITE,
+                                                            MAP_PRIVATE | MAP_ANONYMOUS,
+                                                            -1,
+                                                            0));
     uint8_t* code_ptr =
       AlignUp(&fake_header_code_and_maps_[stack_maps_size + header_size], code_alignment);
 
     memcpy(&fake_header_code_and_maps_[0], stack_map.data(), stack_maps_size);
-    OatQuickMethodHeader method_header(code_ptr - fake_header_code_and_maps_.data());
+    OatQuickMethodHeader method_header(code_ptr - fake_header_code_and_maps_);
     static_assert(std::is_trivially_copyable<OatQuickMethodHeader>::value, "Cannot use memcpy");
     memcpy(code_ptr - header_size, &method_header, header_size);
-    memcpy(code_ptr, fake_code_.data(), fake_code_.size());
+    memcpy(code_ptr, fake_code.data(), fake_code.size());
 
     if (kRuntimeISA == InstructionSet::kArm) {
       // Check that the Thumb2 adjustment will be a NOP, see EntryPointToCodePointer().
@@ -123,10 +131,12 @@ class ExceptionTest : public CommonRuntimeTest {
     method_g_->SetEntryPointFromQuickCompiledCode(code_ptr);
   }
 
+  void TearDown() override { munmap(fake_header_code_and_maps_, fake_header_code_and_maps_size_); }
+
   const DexFile* dex_;
 
-  std::vector<uint8_t> fake_code_;
-  std::vector<uint8_t> fake_header_code_and_maps_;
+  size_t fake_header_code_and_maps_size_;
+  uint8_t* fake_header_code_and_maps_;
 
   ArtMethod* method_f_;
   ArtMethod* method_g_;
diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h
index 9d1f3073fa..79f47f817f 100644
--- a/compiler/jit/jit_logger.h
+++ b/compiler/jit/jit_logger.h
@@ -53,7 +53,7 @@ namespace jit {
 //
 //     Command line Example:
 //       $ perf record -k mono dalvikvm -Xcompiler-option --generate-debug-info -cp <classpath> Test
-//       $ perf inject -i perf.data -o perf.data.jitted
+//       $ perf inject -j -i perf.data -o perf.data.jitted
 //       $ perf report -i perf.data.jitted
 //       $ perf annotate -i perf.data.jitted
 //     NOTE:
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 70cf2d4eb0..ae5f2d0aa9 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -99,7 +99,7 @@ class JNICFITest : public CFITest {
     jni_asm->FinalizeCode();
     std::vector<uint8_t> actual_asm(jni_asm->CodeSize());
     MemoryRegion code(&actual_asm[0], actual_asm.size());
-    jni_asm->FinalizeInstructions(code);
+    jni_asm->CopyInstructions(code);
     ASSERT_EQ(jni_asm->cfi().GetCurrentCFAOffset(), frame_size);
     const std::vector<uint8_t>& actual_cfi = *(jni_asm->cfi().data());
 
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 397db251b8..40989b2999 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -175,9 +175,8 @@ size_t count_nonnull_refs_single_helper(T arg,
 
 // SFINAE for non-ref-types. Always 0.
 template <typename T>
-size_t count_nonnull_refs_single_helper(T arg ATTRIBUTE_UNUSED,
-                                        typename std::enable_if<!jni_type_traits<T>::is_ref>::type*
-                                            = nullptr) {
+size_t count_nonnull_refs_single_helper(
+    [[maybe_unused]] T arg, typename std::enable_if<!jni_type_traits<T>::is_ref>::type* = nullptr) {
   return 0;
 }
 
@@ -591,10 +590,9 @@ struct ScopedCheckHandleScope {
 
 class CountReferencesVisitor : public RootVisitor {
  public:
-  void VisitRoots(mirror::Object*** roots ATTRIBUTE_UNUSED,
+  void VisitRoots([[maybe_unused]] mirror::Object*** roots,
                   size_t count,
-                  const RootInfo& info) override
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+                  const RootInfo& info) override REQUIRES_SHARED(Locks::mutator_lock_) {
     if (info.GetType() == art::RootType::kRootJavaFrame) {
       const JavaFrameRootInfo& jrfi = static_cast<const JavaFrameRootInfo&>(info);
       if (jrfi.GetVReg() == JavaFrameRootInfo::kNativeReferenceArgument) {
@@ -604,10 +602,9 @@ class CountReferencesVisitor : public RootVisitor {
     }
   }
 
-  void VisitRoots(mirror::CompressedReference<mirror::Object>** roots ATTRIBUTE_UNUSED,
-                  size_t count ATTRIBUTE_UNUSED,
-                  const RootInfo& info) override
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+  void VisitRoots([[maybe_unused]] mirror::CompressedReference<mirror::Object>** roots,
+                  [[maybe_unused]] size_t count,
+                  const RootInfo& info) override REQUIRES_SHARED(Locks::mutator_lock_) {
     CHECK_NE(info.GetType(), art::RootType::kRootJavaFrame);
   }
 
@@ -980,8 +977,8 @@ void JniCompilerTest::CompileAndRunIntObjectObjectMethodImpl() {
 JNI_TEST(CompileAndRunIntObjectObjectMethod)
 
 int gJava_MyClassNatives_fooSII_calls[kJniKindCount] = {};
-jint Java_MyClassNatives_fooSII(JNIEnv* env ATTRIBUTE_UNUSED,
-                                jclass klass ATTRIBUTE_UNUSED,
+jint Java_MyClassNatives_fooSII([[maybe_unused]] JNIEnv* env,
+                                [[maybe_unused]] jclass klass,
                                 jint x,
                                 jint y) {
   gJava_MyClassNatives_fooSII_calls[gCurrentJni]++;
@@ -1003,8 +1000,8 @@ void JniCompilerTest::CompileAndRunStaticIntIntMethodImpl() {
 JNI_TEST_CRITICAL(CompileAndRunStaticIntIntMethod)
 
 int gJava_MyClassNatives_fooSDD_calls[kJniKindCount] = {};
-jdouble Java_MyClassNatives_fooSDD(JNIEnv* env ATTRIBUTE_UNUSED,
-                                   jclass klass ATTRIBUTE_UNUSED,
+jdouble Java_MyClassNatives_fooSDD([[maybe_unused]] JNIEnv* env,
+                                   [[maybe_unused]] jclass klass,
                                    jdouble x,
                                    jdouble y) {
   gJava_MyClassNatives_fooSDD_calls[gCurrentJni]++;
@@ -1676,8 +1673,8 @@ void JniCompilerTest::CompileAndRunFloatFloatMethodImpl() {
 
 JNI_TEST(CompileAndRunFloatFloatMethod)
 
-void Java_MyClassNatives_checkParameterAlign(JNIEnv* env ATTRIBUTE_UNUSED,
-                                             jobject thisObj ATTRIBUTE_UNUSED,
+void Java_MyClassNatives_checkParameterAlign([[maybe_unused]] JNIEnv* env,
+                                             [[maybe_unused]] jobject thisObj,
                                              jint i1,
                                              jlong l1) {
   EXPECT_EQ(i1, 1234);
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index e716502911..cd6aac517d 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -323,7 +323,7 @@ ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters()
       static_assert(kCalleeSaveRegisters[lr_index].Equals(
                         Arm64ManagedRegister::FromXRegister(LR)));
       return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(
-          /*pos*/ lr_index, /*length=*/ 1u);
+          /*pos=*/ lr_index, /*length=*/ 1u);
     }
   } else {
     return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index 2b9da6ba1a..459beb0c67 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -29,6 +29,10 @@
 #include "jni/quick/arm64/calling_convention_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "jni/quick/riscv64/calling_convention_riscv64.h"
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "jni/quick/x86/calling_convention_x86.h"
 #endif
@@ -61,6 +65,12 @@ std::unique_ptr<ManagedRuntimeCallingConvention> ManagedRuntimeCallingConvention
           new (allocator) arm64::Arm64ManagedRuntimeCallingConvention(
               is_static, is_synchronized, shorty));
 #endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+    case InstructionSet::kRiscv64:
+      return std::unique_ptr<ManagedRuntimeCallingConvention>(
+          new (allocator) riscv64::Riscv64ManagedRuntimeCallingConvention(
+              is_static, is_synchronized, shorty));
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case InstructionSet::kX86:
       return std::unique_ptr<ManagedRuntimeCallingConvention>(
@@ -114,7 +124,7 @@ bool ManagedRuntimeCallingConvention::IsCurrentArgPossiblyNull() {
 }
 
 size_t ManagedRuntimeCallingConvention::CurrentParamSize() {
-  return ParamSize(itr_args_);
+  return ParamSize(itr_args_, /*reference_size=*/ sizeof(mirror::HeapReference<mirror::Object>));
 }
 
 bool ManagedRuntimeCallingConvention::IsCurrentParamAReference() {
@@ -156,6 +166,12 @@ std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocato
           new (allocator) arm64::Arm64JniCallingConvention(
               is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
 #endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+    case InstructionSet::kRiscv64:
+      return std::unique_ptr<JniCallingConvention>(
+          new (allocator) riscv64::Riscv64JniCallingConvention(
+              is_static, is_synchronized, is_fast_native, is_critical_native, shorty));
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case InstructionSet::kX86:
       return std::unique_ptr<JniCallingConvention>(
@@ -188,7 +204,7 @@ bool JniCallingConvention::HasNext() {
   if (IsCurrentArgExtraForJni()) {
     return true;
   } else {
-    unsigned int arg_pos = GetIteratorPositionWithinShorty();
+    size_t arg_pos = GetIteratorPositionWithinShorty();
     return arg_pos < NumArgs();
   }
 }
@@ -220,7 +236,7 @@ bool JniCallingConvention::IsCurrentParamAReference() {
                               &return_value)) {
     return return_value;
   } else {
-    int arg_pos = GetIteratorPositionWithinShorty();
+    size_t arg_pos = GetIteratorPositionWithinShorty();
     return IsParamAReference(arg_pos);
   }
 }
@@ -242,7 +258,7 @@ bool JniCallingConvention::IsCurrentParamAFloatOrDouble() {
                               &return_value)) {
     return return_value;
   } else {
-    int arg_pos = GetIteratorPositionWithinShorty();
+    size_t arg_pos = GetIteratorPositionWithinShorty();
     return IsParamAFloatOrDouble(arg_pos);
   }
 }
@@ -256,7 +272,7 @@ bool JniCallingConvention::IsCurrentParamADouble() {
                               &return_value)) {
     return return_value;
   } else {
-    int arg_pos = GetIteratorPositionWithinShorty();
+    size_t arg_pos = GetIteratorPositionWithinShorty();
     return IsParamADouble(arg_pos);
   }
 }
@@ -270,7 +286,7 @@ bool JniCallingConvention::IsCurrentParamALong() {
                               &return_value)) {
     return return_value;
   } else {
-    int arg_pos = GetIteratorPositionWithinShorty();
+    size_t arg_pos = GetIteratorPositionWithinShorty();
     return IsParamALong(arg_pos);
   }
 }
@@ -279,8 +295,9 @@ size_t JniCallingConvention::CurrentParamSize() const {
   if (IsCurrentArgExtraForJni()) {
     return static_cast<size_t>(frame_pointer_size_);  // JNIEnv or jobject/jclass
   } else {
-    int arg_pos = GetIteratorPositionWithinShorty();
-    return ParamSize(arg_pos);
+    size_t arg_pos = GetIteratorPositionWithinShorty();
+    // References are converted to `jobject` for the native call. Pass `frame_pointer_size_`.
+    return ParamSize(arg_pos, /*reference_size=*/ static_cast<size_t>(frame_pointer_size_));
   }
 }
 
@@ -305,7 +322,7 @@ bool JniCallingConvention::HasSelfClass() const {
   }
 }
 
-unsigned int JniCallingConvention::GetIteratorPositionWithinShorty() const {
+size_t JniCallingConvention::GetIteratorPositionWithinShorty() const {
   // We need to subtract out the extra JNI arguments if we want to use this iterator position
   // with the inherited CallingConvention member functions, which rely on scanning the shorty.
   // Note that our shorty does *not* include the JNIEnv, jclass/jobject parameters.
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 0187b14256..2657e943e6 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -178,14 +178,18 @@ class CallingConvention : public DeletableArenaObject<kArenaAllocCallingConventi
   size_t NumReferenceArgs() const {
     return num_ref_args_;
   }
-  size_t ParamSize(unsigned int param) const {
+  size_t ParamSize(size_t param, size_t reference_size) const {
     DCHECK_LT(param, NumArgs());
     if (IsStatic()) {
       param++;  // 0th argument must skip return value at start of the shorty
     } else if (param == 0) {
-      return sizeof(mirror::HeapReference<mirror::Object>);  // this argument
+      return reference_size;  // this argument
     }
-    size_t result = Primitive::ComponentSize(Primitive::GetType(shorty_[param]));
+    Primitive::Type type = Primitive::GetType(shorty_[param]);
+    if (type == Primitive::kPrimNot) {
+      return reference_size;
+    }
+    size_t result = Primitive::ComponentSize(type);
     if (result >= 1 && result < 4) {
       result = 4;
     }
@@ -344,7 +348,7 @@ class JniCallingConvention : public CallingConvention {
     return IsCurrentParamALong() || IsCurrentParamADouble();
   }
   bool IsCurrentParamJniEnv();
-  size_t CurrentParamSize() const;
+  virtual size_t CurrentParamSize() const;
   virtual bool IsCurrentParamInRegister() = 0;
   virtual bool IsCurrentParamOnStack() = 0;
   virtual ManagedRegister CurrentParamRegister() = 0;
@@ -432,7 +436,7 @@ class JniCallingConvention : public CallingConvention {
   bool HasSelfClass() const;
 
   // Returns the position of itr_args_, fixed up by removing the offset of extra JNI arguments.
-  unsigned int GetIteratorPositionWithinShorty() const;
+  size_t GetIteratorPositionWithinShorty() const;
 
   // Is the current argument (at the iterator) an extra argument for JNI?
   bool IsCurrentArgExtraForJni() const;
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index c60d97467e..9349d2c9fd 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -154,11 +154,11 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
     // -- Don't allow any objects as parameter or return value
     if (UNLIKELY(is_critical_native)) {
       CHECK(is_static)
-          << "@CriticalNative functions cannot be virtual since that would"
+          << "@CriticalNative functions cannot be virtual since that would "
           << "require passing a reference parameter (this), which is illegal "
           << dex_file.PrettyMethod(method_idx, /* with_signature= */ true);
       CHECK(!is_synchronized)
-          << "@CriticalNative functions cannot be synchronized since that would"
+          << "@CriticalNative functions cannot be synchronized since that would "
           << "require passing a (class and/or this) reference parameter, which is illegal "
           << dex_file.PrettyMethod(method_idx, /* with_signature= */ true);
       for (size_t i = 0; i < strlen(shorty); ++i) {
@@ -387,8 +387,8 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
     DCHECK(main_jni_conv->HasNext());
     static_assert(kObjectReferenceSize == 4u);
     bool is_reference = mr_conv->IsCurrentParamAReference();
-    size_t src_size = (!is_reference && mr_conv->IsCurrentParamALongOrDouble()) ? 8u : 4u;
-    size_t dest_size = is_reference ? kRawPointerSize : src_size;
+    size_t src_size = mr_conv->CurrentParamSize();
+    size_t dest_size = main_jni_conv->CurrentParamSize();
     src_args.push_back(mr_conv->IsCurrentParamInRegister()
         ? ArgumentLocation(mr_conv->CurrentParamRegister(), src_size)
         : ArgumentLocation(mr_conv->CurrentParamStackOffset(), src_size));
@@ -621,7 +621,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
           main_jni_conv->CalleeSaveScratchRegisters()[0], kObjectReferenceSize);
       // Load the declaring class reference.
       DCHECK_EQ(ArtMethod::DeclaringClassOffset().SizeValue(), 0u);
-      __ Load(temp, method_register, MemberOffset(0u), kObjectReferenceSize);
+      __ LoadGcRootWithoutReadBarrier(temp, method_register, MemberOffset(0u));
       // Return to main path if the class object is marked.
       __ TestMarkBit(temp, jclass_read_barrier_return.get(), JNIMacroUnaryCondition::kNotZero);
     }
@@ -724,7 +724,7 @@ static JniCompiledMethod ArtJniCompileMethodInternal(const CompilerOptions& comp
   size_t cs = __ CodeSize();
   std::vector<uint8_t> managed_code(cs);
   MemoryRegion code(&managed_code[0], managed_code.size());
-  __ FinalizeInstructions(code);
+  __ CopyInstructions(code);
 
   return JniCompiledMethod(instruction_set,
                            std::move(managed_code),
diff --git a/compiler/jni/quick/riscv64/calling_convention_riscv64.cc b/compiler/jni/quick/riscv64/calling_convention_riscv64.cc
new file mode 100644
index 0000000000..b083fec14a
--- /dev/null
+++ b/compiler/jni/quick/riscv64/calling_convention_riscv64.cc
@@ -0,0 +1,429 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "calling_convention_riscv64.h"
+
+#include <android-base/logging.h>
+
+#include "arch/instruction_set.h"
+#include "arch/riscv64/jni_frame_riscv64.h"
+#include "utils/riscv64/managed_register_riscv64.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+static constexpr ManagedRegister kXArgumentRegisters[] = {
+    Riscv64ManagedRegister::FromXRegister(A0),
+    Riscv64ManagedRegister::FromXRegister(A1),
+    Riscv64ManagedRegister::FromXRegister(A2),
+    Riscv64ManagedRegister::FromXRegister(A3),
+    Riscv64ManagedRegister::FromXRegister(A4),
+    Riscv64ManagedRegister::FromXRegister(A5),
+    Riscv64ManagedRegister::FromXRegister(A6),
+    Riscv64ManagedRegister::FromXRegister(A7),
+};
+static_assert(kMaxIntLikeArgumentRegisters == arraysize(kXArgumentRegisters));
+
+static const FRegister kFArgumentRegisters[] = {
+  FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
+};
+static_assert(kMaxFloatOrDoubleArgumentRegisters == arraysize(kFArgumentRegisters));
+
+static constexpr ManagedRegister kCalleeSaveRegisters[] = {
+    // Core registers.
+    Riscv64ManagedRegister::FromXRegister(S0),
+    // ART thread register (TR = S1) is not saved on the stack.
+    Riscv64ManagedRegister::FromXRegister(S2),
+    Riscv64ManagedRegister::FromXRegister(S3),
+    Riscv64ManagedRegister::FromXRegister(S4),
+    Riscv64ManagedRegister::FromXRegister(S5),
+    Riscv64ManagedRegister::FromXRegister(S6),
+    Riscv64ManagedRegister::FromXRegister(S7),
+    Riscv64ManagedRegister::FromXRegister(S8),
+    Riscv64ManagedRegister::FromXRegister(S9),
+    Riscv64ManagedRegister::FromXRegister(S10),
+    Riscv64ManagedRegister::FromXRegister(S11),
+    Riscv64ManagedRegister::FromXRegister(RA),
+
+    // Hard float registers.
+    Riscv64ManagedRegister::FromFRegister(FS0),
+    Riscv64ManagedRegister::FromFRegister(FS1),
+    Riscv64ManagedRegister::FromFRegister(FS2),
+    Riscv64ManagedRegister::FromFRegister(FS3),
+    Riscv64ManagedRegister::FromFRegister(FS4),
+    Riscv64ManagedRegister::FromFRegister(FS5),
+    Riscv64ManagedRegister::FromFRegister(FS6),
+    Riscv64ManagedRegister::FromFRegister(FS7),
+    Riscv64ManagedRegister::FromFRegister(FS8),
+    Riscv64ManagedRegister::FromFRegister(FS9),
+    Riscv64ManagedRegister::FromFRegister(FS10),
+    Riscv64ManagedRegister::FromFRegister(FS11),
+};
+
+template <size_t size>
+static constexpr uint32_t CalculateCoreCalleeSpillMask(
+    const ManagedRegister (&callee_saves)[size]) {
+  uint32_t result = 0u;
+  for (auto&& r : callee_saves) {
+    if (r.AsRiscv64().IsXRegister()) {
+      result |= (1u << r.AsRiscv64().AsXRegister());
+    }
+  }
+  return result;
+}
+
+template <size_t size>
+static constexpr uint32_t CalculateFpCalleeSpillMask(const ManagedRegister (&callee_saves)[size]) {
+  uint32_t result = 0u;
+  for (auto&& r : callee_saves) {
+    if (r.AsRiscv64().IsFRegister()) {
+      result |= (1u << r.AsRiscv64().AsFRegister());
+    }
+  }
+  return result;
+}
+
+static constexpr uint32_t kCoreCalleeSpillMask = CalculateCoreCalleeSpillMask(kCalleeSaveRegisters);
+static constexpr uint32_t kFpCalleeSpillMask = CalculateFpCalleeSpillMask(kCalleeSaveRegisters);
+
+static constexpr ManagedRegister kNativeCalleeSaveRegisters[] = {
+    // Core registers.
+    Riscv64ManagedRegister::FromXRegister(S0),
+    Riscv64ManagedRegister::FromXRegister(S1),
+    Riscv64ManagedRegister::FromXRegister(S2),
+    Riscv64ManagedRegister::FromXRegister(S3),
+    Riscv64ManagedRegister::FromXRegister(S4),
+    Riscv64ManagedRegister::FromXRegister(S5),
+    Riscv64ManagedRegister::FromXRegister(S6),
+    Riscv64ManagedRegister::FromXRegister(S7),
+    Riscv64ManagedRegister::FromXRegister(S8),
+    Riscv64ManagedRegister::FromXRegister(S9),
+    Riscv64ManagedRegister::FromXRegister(S10),
+    Riscv64ManagedRegister::FromXRegister(S11),
+    Riscv64ManagedRegister::FromXRegister(RA),
+
+    // Hard float registers.
+    Riscv64ManagedRegister::FromFRegister(FS0),
+    Riscv64ManagedRegister::FromFRegister(FS1),
+    Riscv64ManagedRegister::FromFRegister(FS2),
+    Riscv64ManagedRegister::FromFRegister(FS3),
+    Riscv64ManagedRegister::FromFRegister(FS4),
+    Riscv64ManagedRegister::FromFRegister(FS5),
+    Riscv64ManagedRegister::FromFRegister(FS6),
+    Riscv64ManagedRegister::FromFRegister(FS7),
+    Riscv64ManagedRegister::FromFRegister(FS8),
+    Riscv64ManagedRegister::FromFRegister(FS9),
+    Riscv64ManagedRegister::FromFRegister(FS10),
+    Riscv64ManagedRegister::FromFRegister(FS11),
+};
+
+static constexpr uint32_t kNativeCoreCalleeSpillMask =
+    CalculateCoreCalleeSpillMask(kNativeCalleeSaveRegisters);
+static constexpr uint32_t kNativeFpCalleeSpillMask =
+    CalculateFpCalleeSpillMask(kNativeCalleeSaveRegisters);
+
+static ManagedRegister ReturnRegisterForShorty(const char* shorty) {
+  if (shorty[0] == 'F' || shorty[0] == 'D') {
+    return Riscv64ManagedRegister::FromFRegister(FA0);
+  } else if (shorty[0] == 'V') {
+    return Riscv64ManagedRegister::NoRegister();
+  } else {
+    // All other return types use A0. Note that there is no managed type wide enough to use A1/FA1.
+    return Riscv64ManagedRegister::FromXRegister(A0);
+  }
+}
+
+// Managed runtime calling convention
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::ReturnRegister() const {
+  return ReturnRegisterForShorty(GetShorty());
+}
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::MethodRegister() {
+  return Riscv64ManagedRegister::FromXRegister(A0);
+}
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::ArgumentRegisterForMethodExitHook() {
+  DCHECK(!Riscv64ManagedRegister::FromXRegister(A4).Overlaps(ReturnRegister().AsRiscv64()));
+  return Riscv64ManagedRegister::FromXRegister(A4);
+}
+
+bool Riscv64ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
+  // Note: The managed ABI does not pass FP args in general purpose registers.
+  // This differs from the native ABI which does that after using all FP arg registers.
+  if (IsCurrentParamAFloatOrDouble()) {
+    return itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters;
+  } else {
+    size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+    return /* method */ 1u + non_fp_arg_number < kMaxIntLikeArgumentRegisters;
+  }
+}
+
+bool Riscv64ManagedRuntimeCallingConvention::IsCurrentParamOnStack() {
+  return !IsCurrentParamInRegister();
+}
+
+ManagedRegister Riscv64ManagedRuntimeCallingConvention::CurrentParamRegister() {
+  DCHECK(IsCurrentParamInRegister());
+  if (IsCurrentParamAFloatOrDouble()) {
+    return Riscv64ManagedRegister::FromFRegister(kFArgumentRegisters[itr_float_and_doubles_]);
+  } else {
+    size_t non_fp_arg_number = itr_args_ - itr_float_and_doubles_;
+    return kXArgumentRegisters[/* method */ 1u + non_fp_arg_number];
+  }
+}
+
+FrameOffset Riscv64ManagedRuntimeCallingConvention::CurrentParamStackOffset() {
+  return FrameOffset(displacement_.Int32Value() +  // displacement
+                     kFramePointerSize +  // Method ref
+                     (itr_slots_ * sizeof(uint32_t)));  // offset into in args
+}
+
+// JNI calling convention
+
+Riscv64JniCallingConvention::Riscv64JniCallingConvention(bool is_static,
+                                                         bool is_synchronized,
+                                                         bool is_fast_native,
+                                                         bool is_critical_native,
+                                                         const char* shorty)
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_fast_native,
+                           is_critical_native,
+                           shorty,
+                           kRiscv64PointerSize) {
+}
+
+ManagedRegister Riscv64JniCallingConvention::ReturnRegister() const {
+  return ReturnRegisterForShorty(GetShorty());
+}
+
+ManagedRegister Riscv64JniCallingConvention::IntReturnRegister() const {
+  return Riscv64ManagedRegister::FromXRegister(A0);
+}
+
+size_t Riscv64JniCallingConvention::FrameSize() const {
+  if (is_critical_native_) {
+    CHECK(!SpillsMethod());
+    CHECK(!HasLocalReferenceSegmentState());
+    return 0u;  // There is no managed frame for @CriticalNative.
+  }
+
+  // Method*, callee save area size, local reference segment state
+  DCHECK(SpillsMethod());
+  size_t method_ptr_size = static_cast<size_t>(kFramePointerSize);
+  size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+  size_t total_size = method_ptr_size + callee_save_area_size;
+
+  DCHECK(HasLocalReferenceSegmentState());
+  // Cookie is saved in one of the spilled registers.
+
+  return RoundUp(total_size, kStackAlignment);
+}
+
+size_t Riscv64JniCallingConvention::OutFrameSize() const {
+  // Count param args, including JNIEnv* and jclass*.
+  size_t all_args = NumberOfExtraArgumentsForJni() + NumArgs();
+  size_t num_fp_args = NumFloatOrDoubleArgs();
+  DCHECK_GE(all_args, num_fp_args);
+  size_t num_non_fp_args = all_args - num_fp_args;
+  // The size of outgoing arguments.
+  size_t size = GetNativeOutArgsSize(num_fp_args, num_non_fp_args);
+
+  // @CriticalNative can use tail call as all managed callee saves are preserved by AAPCS64.
+  static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
+  static_assert((kFpCalleeSpillMask & ~kNativeFpCalleeSpillMask) == 0u);
+
+  // For @CriticalNative, we can make a tail call if there are no stack args.
+  // Otherwise, add space for return PC.
+  // Note: Result does not neeed to be zero- or sign-extended.
+  DCHECK(!RequiresSmallResultTypeExtension());
+  if (is_critical_native_ && size != 0u) {
+    size += kFramePointerSize;  // We need to spill RA with the args.
+  }
+  size_t out_args_size = RoundUp(size, kNativeStackAlignment);
+  if (UNLIKELY(IsCriticalNative())) {
+    DCHECK_EQ(out_args_size, GetCriticalNativeStubFrameSize(GetShorty(), NumArgs() + 1u));
+  }
+  return out_args_size;
+}
+
+ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::CalleeSaveRegisters() const {
+  if (UNLIKELY(IsCriticalNative())) {
+    if (UseTailCall()) {
+      return ArrayRef<const ManagedRegister>();  // Do not spill anything.
+    } else {
+      // Spill RA with out args.
+      static_assert((kCoreCalleeSpillMask & (1 << RA)) != 0u);  // Contains RA.
+      constexpr size_t ra_index = POPCOUNT(kCoreCalleeSpillMask) - 1u;
+      static_assert(kCalleeSaveRegisters[ra_index].Equals(
+                        Riscv64ManagedRegister::FromXRegister(RA)));
+      return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(
+          /*pos=*/ ra_index, /*length=*/ 1u);
+    }
+  } else {
+    return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
+  }
+}
+
+ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::CalleeSaveScratchRegisters() const {
+  DCHECK(!IsCriticalNative());
+  // Use S3-S11 from managed callee saves. All these registers are also native callee saves.
+  constexpr size_t kStart = 2u;
+  constexpr size_t kLength = 9u;
+  static_assert(kCalleeSaveRegisters[kStart].Equals(Riscv64ManagedRegister::FromXRegister(S3)));
+  static_assert(kCalleeSaveRegisters[kStart + kLength - 1u].Equals(
+                    Riscv64ManagedRegister::FromXRegister(S11)));
+  static_assert((kCoreCalleeSpillMask & ~kNativeCoreCalleeSpillMask) == 0u);
+  return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters).SubArray(kStart, kLength);
+}
+
+ArrayRef<const ManagedRegister> Riscv64JniCallingConvention::ArgumentScratchRegisters() const {
+  DCHECK(!IsCriticalNative());
+  // Exclude A0 if it's used as a return register.
+  static_assert(kXArgumentRegisters[0].Equals(Riscv64ManagedRegister::FromXRegister(A0)));
+  ArrayRef<const ManagedRegister> scratch_regs(kXArgumentRegisters);
+  Riscv64ManagedRegister return_reg = ReturnRegister().AsRiscv64();
+  auto return_reg_overlaps = [return_reg](ManagedRegister reg) {
+    return return_reg.Overlaps(reg.AsRiscv64());
+  };
+  if (return_reg_overlaps(scratch_regs[0])) {
+    scratch_regs = scratch_regs.SubArray(/*pos=*/ 1u);
+  }
+  DCHECK(std::none_of(scratch_regs.begin(), scratch_regs.end(), return_reg_overlaps));
+  return scratch_regs;
+}
+
+uint32_t Riscv64JniCallingConvention::CoreSpillMask() const {
+  return is_critical_native_ ? 0u : kCoreCalleeSpillMask;
+}
+
+uint32_t Riscv64JniCallingConvention::FpSpillMask() const {
+  return is_critical_native_ ? 0u : kFpCalleeSpillMask;
+}
+
+size_t Riscv64JniCallingConvention::CurrentParamSize() const {
+  if (IsCurrentArgExtraForJni()) {
+    return static_cast<size_t>(frame_pointer_size_);  // JNIEnv or jobject/jclass
+  } else {
+    size_t arg_pos = GetIteratorPositionWithinShorty();
+    DCHECK_LT(arg_pos, NumArgs());
+    if (IsStatic()) {
+      ++arg_pos;  // 0th argument must skip return value at start of the shorty
+    } else if (arg_pos == 0) {
+      return static_cast<size_t>(kRiscv64PointerSize);  // this argument
+    }
+    // The riscv64 native calling convention specifies that integers narrower than XLEN (64)
+    // bits are "widened according to the sign of their type up to 32 bits, then sign-extended
+    // to XLEN bits." Thus, everything other than `float` (which has the high 32 bits undefined)
+    // is passed as 64 bits, whether in register, or on the stack.
+    return (GetShorty()[arg_pos] == 'F') ? 4u : static_cast<size_t>(kRiscv64PointerSize);
+  }
+}
+
+bool Riscv64JniCallingConvention::IsCurrentParamInRegister() {
+  // FP args use FPRs, then GPRs and only then the stack.
+  if (itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters) {
+    if (IsCurrentParamAFloatOrDouble()) {
+      return true;
+    } else {
+      size_t num_non_fp_args = itr_args_ - itr_float_and_doubles_;
+      return num_non_fp_args < kMaxIntLikeArgumentRegisters;
+    }
+  } else {
+    return (itr_args_ < kMaxFloatOrDoubleArgumentRegisters + kMaxIntLikeArgumentRegisters);
+  }
+}
+
+bool Riscv64JniCallingConvention::IsCurrentParamOnStack() {
+  return !IsCurrentParamInRegister();
+}
+
+ManagedRegister Riscv64JniCallingConvention::CurrentParamRegister() {
+  // FP args use FPRs, then GPRs and only then the stack.
+  CHECK(IsCurrentParamInRegister());
+  if (itr_float_and_doubles_ < kMaxFloatOrDoubleArgumentRegisters) {
+    if (IsCurrentParamAFloatOrDouble()) {
+      return Riscv64ManagedRegister::FromFRegister(kFArgumentRegisters[itr_float_and_doubles_]);
+    } else {
+      size_t num_non_fp_args = itr_args_ - itr_float_and_doubles_;
+      DCHECK_LT(num_non_fp_args, kMaxIntLikeArgumentRegisters);
+      return kXArgumentRegisters[num_non_fp_args];
+    }
+  } else {
+    // This argument is in a GPR, whether it's a FP arg or a non-FP arg.
+    DCHECK_LT(itr_args_, kMaxFloatOrDoubleArgumentRegisters + kMaxIntLikeArgumentRegisters);
+    return kXArgumentRegisters[itr_args_ - kMaxFloatOrDoubleArgumentRegisters];
+  }
+}
+
+FrameOffset Riscv64JniCallingConvention::CurrentParamStackOffset() {
+  CHECK(IsCurrentParamOnStack());
+  // Account for FP arguments passed through FA0-FA7.
+  // All other args are passed through A0-A7 (even FP args) and the stack.
+  size_t num_gpr_and_stack_args =
+      itr_args_ - std::min<size_t>(kMaxFloatOrDoubleArgumentRegisters, itr_float_and_doubles_);
+  size_t args_on_stack =
+      num_gpr_and_stack_args - std::min(kMaxIntLikeArgumentRegisters, num_gpr_and_stack_args);
+  size_t offset = displacement_.Int32Value() - OutFrameSize() + (args_on_stack * kFramePointerSize);
+  CHECK_LT(offset, OutFrameSize());
+  return FrameOffset(offset);
+}
+
+bool Riscv64JniCallingConvention::RequiresSmallResultTypeExtension() const {
+  // RISC-V native calling convention requires values to be returned the way that the first
+  // argument would be passed. Arguments are zero-/sign-extended to 32 bits based on their
+  // type, then sign-extended to 64 bits. This is the same as in the ART mamaged ABI.
+  // (Not applicable to FP args which are returned in `FA0`. A `float` is NaN-boxed.)
+  return false;
+}
+
+// T0 is neither managed callee-save, nor argument register. It is suitable for use as the
+// locking argument for synchronized methods and hidden argument for @CriticalNative methods.
+static void AssertT0IsNeitherCalleeSaveNorArgumentRegister() {
+  // TODO: Change to static_assert; std::none_of should be constexpr since C++20.
+  DCHECK(std::none_of(kCalleeSaveRegisters,
+                      kCalleeSaveRegisters + std::size(kCalleeSaveRegisters),
+                      [](ManagedRegister callee_save) constexpr {
+                        return callee_save.Equals(Riscv64ManagedRegister::FromXRegister(T0));
+                      }));
+  DCHECK(std::none_of(kXArgumentRegisters,
+                      kXArgumentRegisters + std::size(kXArgumentRegisters),
+                      [](ManagedRegister arg) { return arg.AsRiscv64().AsXRegister() == T0; }));
+}
+
+ManagedRegister Riscv64JniCallingConvention::LockingArgumentRegister() const {
+  DCHECK(!IsFastNative());
+  DCHECK(!IsCriticalNative());
+  DCHECK(IsSynchronized());
+  AssertT0IsNeitherCalleeSaveNorArgumentRegister();
+  return Riscv64ManagedRegister::FromXRegister(T0);
+}
+
+ManagedRegister Riscv64JniCallingConvention::HiddenArgumentRegister() const {
+  DCHECK(IsCriticalNative());
+  AssertT0IsNeitherCalleeSaveNorArgumentRegister();
+  return Riscv64ManagedRegister::FromXRegister(T0);
+}
+
+// Whether to use tail call (used only for @CriticalNative).
+bool Riscv64JniCallingConvention::UseTailCall() const {
+  CHECK(IsCriticalNative());
+  return OutFrameSize() == 0u;
+}
+
+}  // namespace riscv64
+}  // namespace art
diff --git a/compiler/jni/quick/riscv64/calling_convention_riscv64.h b/compiler/jni/quick/riscv64/calling_convention_riscv64.h
new file mode 100644
index 0000000000..5add183f72
--- /dev/null
+++ b/compiler/jni/quick/riscv64/calling_convention_riscv64.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_
+#define ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_
+
+#include "base/enums.h"
+#include "base/macros.h"
+#include "jni/quick/calling_convention.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+class Riscv64ManagedRuntimeCallingConvention final : public ManagedRuntimeCallingConvention {
+ public:
+  Riscv64ManagedRuntimeCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
+      : ManagedRuntimeCallingConvention(is_static,
+                                        is_synchronized,
+                                        shorty,
+                                        PointerSize::k64) {}
+  ~Riscv64ManagedRuntimeCallingConvention() override {}
+  // Calling convention
+  ManagedRegister ReturnRegister() const override;
+  // Managed runtime calling convention
+  ManagedRegister MethodRegister() override;
+  ManagedRegister ArgumentRegisterForMethodExitHook() override;
+  bool IsCurrentParamInRegister() override;
+  bool IsCurrentParamOnStack() override;
+  ManagedRegister CurrentParamRegister() override;
+  FrameOffset CurrentParamStackOffset() override;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Riscv64ManagedRuntimeCallingConvention);
+};
+
+class Riscv64JniCallingConvention final : public JniCallingConvention {
+ public:
+  Riscv64JniCallingConvention(bool is_static,
+                              bool is_synchronized,
+                              bool is_fast_native,
+                              bool is_critical_native,
+                              const char* shorty);
+  ~Riscv64JniCallingConvention() override {}
+  // Calling convention
+  ManagedRegister ReturnRegister() const override;
+  ManagedRegister IntReturnRegister() const override;
+  // JNI calling convention
+  size_t FrameSize() const override;
+  size_t OutFrameSize() const override;
+  ArrayRef<const ManagedRegister> CalleeSaveRegisters() const override;
+  ArrayRef<const ManagedRegister> CalleeSaveScratchRegisters() const override;
+  ArrayRef<const ManagedRegister> ArgumentScratchRegisters() const override;
+  uint32_t CoreSpillMask() const override;
+  uint32_t FpSpillMask() const override;
+  size_t CurrentParamSize() const override;
+  bool IsCurrentParamInRegister() override;
+  bool IsCurrentParamOnStack() override;
+  ManagedRegister CurrentParamRegister() override;
+  FrameOffset CurrentParamStackOffset() override;
+  bool RequiresSmallResultTypeExtension() const override;
+
+  // Locking argument register, used to pass the synchronization object for calls
+  // to `JniLockObject()` and `JniUnlockObject()`.
+  ManagedRegister LockingArgumentRegister() const override;
+
+  // Hidden argument register, used to pass the method pointer for @CriticalNative call.
+  ManagedRegister HiddenArgumentRegister() const override;
+
+  // Whether to use tail call (used only for @CriticalNative).
+  bool UseTailCall() const override;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Riscv64JniCallingConvention);
+};
+
+}  // namespace riscv64
+}  // namespace art
+
+#endif  // ART_COMPILER_JNI_QUICK_RISCV64_CALLING_CONVENTION_RISCV64_H_
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index 9d0761d2f7..0f981dd6df 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -117,7 +117,7 @@ ArrayRef<const ManagedRegister> X86_64JniCallingConvention::ArgumentScratchRegis
   return scratch_regs;
 }
 
-static ManagedRegister ReturnRegisterForShorty(const char* shorty, bool jni ATTRIBUTE_UNUSED) {
+static ManagedRegister ReturnRegisterForShorty(const char* shorty, [[maybe_unused]] bool jni) {
   if (shorty[0] == 'F' || shorty[0] == 'D') {
     return X86_64ManagedRegister::FromXmmRegister(XMM0);
   } else if (shorty[0] == 'J') {
diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc
index 22b174fce6..6b62874643 100644
--- a/compiler/linker/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -107,13 +107,13 @@ TEST_F(OutputStreamTest, BufferedFlush) {
           flush_called(false) { }
     ~CheckingOutputStream() override {}
 
-    bool WriteFully(const void* buffer ATTRIBUTE_UNUSED,
-                    size_t byte_count ATTRIBUTE_UNUSED) override {
+    bool WriteFully([[maybe_unused]] const void* buffer,
+                    [[maybe_unused]] size_t byte_count) override {
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
     }
 
-    off_t Seek(off_t offset ATTRIBUTE_UNUSED, Whence whence ATTRIBUTE_UNUSED) override {
+    off_t Seek([[maybe_unused]] off_t offset, [[maybe_unused]] Whence whence) override {
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
     }
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 703584c537..9da2bfb8ef 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -20,7 +20,6 @@
 #include "dex/bytecode_utils.h"
 #include "dex/code_item_accessors-inl.h"
 #include "dex/dex_file_exception_helpers.h"
-#include "quicken_info.h"
 
 namespace art HIDDEN {
 
@@ -40,9 +39,7 @@ HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph,
                       local_allocator->Adapter(kArenaAllocGraphBuilder)),
       throwing_blocks_(kDefaultNumberOfThrowingBlocks,
                        local_allocator->Adapter(kArenaAllocGraphBuilder)),
-      number_of_branches_(0u),
-      quicken_index_for_dex_pc_(std::less<uint32_t>(),
-                                local_allocator->Adapter(kArenaAllocGraphBuilder)) {}
+      number_of_branches_(0u) {}
 
 HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) {
   return MaybeCreateBlockAt(dex_pc, dex_pc);
@@ -147,7 +144,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
   HBasicBlock* block = graph_->GetEntryBlock();
   graph_->AddBlock(block);
 
-  size_t quicken_index = 0;
   bool is_throwing_block = false;
   // Calculate the qucikening index here instead of CreateBranchTargets since it's easier to
   // calculate in dex_pc order.
@@ -158,8 +154,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
     // Check if this dex_pc address starts a new basic block.
     HBasicBlock* next_block = GetBlockAt(dex_pc);
     if (next_block != nullptr) {
-      // We only need quicken index entries for basic block boundaries.
-      quicken_index_for_dex_pc_.Put(dex_pc, quicken_index);
       if (block != nullptr) {
         // Last instruction did not end its basic block but a new one starts here.
         // It must have been a block falling through into the next one.
@@ -169,10 +163,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
       is_throwing_block = false;
       graph_->AddBlock(block);
     }
-    // Make sure to increment this before the continues.
-    if (QuickenInfoTable::NeedsIndexForInstruction(&instruction)) {
-      ++quicken_index;
-    }
 
     if (block == nullptr) {
       // Ignore dead code.
@@ -483,8 +473,4 @@ void HBasicBlockBuilder::BuildIntrinsic() {
   body->AddSuccessor(exit_block);
 }
 
-size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const {
-  return quicken_index_for_dex_pc_.Get(dex_pc);
-}
-
 }  // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 8668ef8221..1aa9375e5a 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -45,8 +45,6 @@ class HBasicBlockBuilder : public ValueObject {
   size_t GetNumberOfBranches() const { return number_of_branches_; }
   HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
 
-  size_t GetQuickenIndex(uint32_t dex_pc) const;
-
  private:
   // Creates a basic block starting at given `dex_pc`.
   HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
@@ -83,9 +81,6 @@ class HBasicBlockBuilder : public ValueObject {
   ScopedArenaVector<HBasicBlock*> throwing_blocks_;
   size_t number_of_branches_;
 
-  // A table to quickly find the quicken index for the first instruction of a basic block.
-  ScopedArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_;
-
   static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
 
   DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 919abfdc49..c0d4c37659 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -1047,14 +1047,14 @@ class BCEVisitor final : public HGraphVisitor {
 
     HDiv* div = nullptr;
     int64_t const_divisor = 0;
-    if (HMul* mul = instruction->GetRight()->AsMul()) {
+    if (HMul* mul = instruction->GetRight()->AsMulOrNull()) {
       if (!mul->GetLeft()->IsDiv() || !mul->GetRight()->IsConstant()) {
         return false;
       }
       div = mul->GetLeft()->AsDiv();
       const_divisor = Int64FromConstant(mul->GetRight()->AsConstant());
-    } else if (HAdd* add = instruction->GetRight()->AsAdd()) {
-      HShl* shl = add->GetRight()->AsShl();
+    } else if (HAdd* add = instruction->GetRight()->AsAddOrNull()) {
+      HShl* shl = add->GetRight()->AsShlOrNull();
       if (!is_needed_shl(shl)) {
         return false;
       }
@@ -1070,8 +1070,8 @@ class BCEVisitor final : public HGraphVisitor {
         return false;
       }
       const_divisor = (1LL << n) + 1;
-    } else if (HSub* sub = instruction->GetRight()->AsSub()) {
-      HShl* shl = sub->GetLeft()->AsShl();
+    } else if (HSub* sub = instruction->GetRight()->AsSubOrNull()) {
+      HShl* shl = sub->GetLeft()->AsShlOrNull();
       if (!is_needed_shl(shl)) {
         return false;
       }
@@ -1378,8 +1378,7 @@ class BCEVisitor final : public HGraphVisitor {
                                     HInstruction* array_length,
                                     HInstruction* base,
                                     int32_t min_c, int32_t max_c) {
-    HBoundsCheck* bounds_check =
-        first_index_bounds_check_map_.Get(array_length->GetId())->AsBoundsCheck();
+    HBoundsCheck* bounds_check = first_index_bounds_check_map_.Get(array_length->GetId());
     // Construct deoptimization on single or double bounds on range [base-min_c,base+max_c],
     // for example either for a[0]..a[3] just 3 or for a[base-1]..a[base+3] both base-1
     // and base+3, since we made the assumption any in between value may occur too.
diff --git a/compiler/optimizing/code_generation_data.cc b/compiler/optimizing/code_generation_data.cc
new file mode 100644
index 0000000000..7b23d46dc5
--- /dev/null
+++ b/compiler/optimizing/code_generation_data.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_linker.h"
+#include "code_generation_data.h"
+#include "code_generator.h"
+#include "intern_table.h"
+#include "mirror/object-inl.h"
+#include "runtime.h"
+
+namespace art HIDDEN {
+
+void CodeGenerationData::EmitJitRoots(
+    /*out*/std::vector<Handle<mirror::Object>>* roots) {
+  DCHECK(roots->empty());
+  roots->reserve(GetNumberOfJitRoots());
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  size_t index = 0;
+  for (auto& entry : jit_string_roots_) {
+    // Update the `roots` with the string, and replace the address temporarily
+    // stored to the index in the table.
+    uint64_t address = entry.second;
+    roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+    DCHECK(roots->back() != nullptr);
+    DCHECK(roots->back()->IsString());
+    entry.second = index;
+    // Ensure the string is strongly interned. This is a requirement on how the JIT
+    // handles strings. b/32995596
+    class_linker->GetInternTable()->InternStrong(roots->back()->AsString());
+    ++index;
+  }
+  for (auto& entry : jit_class_roots_) {
+    // Update the `roots` with the class, and replace the address temporarily
+    // stored to the index in the table.
+    uint64_t address = entry.second;
+    roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+    DCHECK(roots->back() != nullptr);
+    DCHECK(roots->back()->IsClass());
+    entry.second = index;
+    ++index;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/code_generation_data.h b/compiler/optimizing/code_generation_data.h
new file mode 100644
index 0000000000..e78ba8f574
--- /dev/null
+++ b/compiler/optimizing/code_generation_data.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
+
+#include <memory>
+
+#include "arch/instruction_set.h"
+#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
+#include "code_generator.h"
+#include "dex/string_reference.h"
+#include "dex/type_reference.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "mirror/string.h"
+#include "stack_map_stream.h"
+
+namespace art HIDDEN {
+
+class CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> {
+ public:
+  static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack,
+                                                    InstructionSet instruction_set) {
+    ScopedArenaAllocator allocator(arena_stack);
+    void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator);
+    return std::unique_ptr<CodeGenerationData>(
+        ::new (memory) CodeGenerationData(std::move(allocator), instruction_set));
+  }
+
+  ScopedArenaAllocator* GetScopedAllocator() {
+    return &allocator_;
+  }
+
+  void AddSlowPath(SlowPathCode* slow_path) {
+    slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path));
+  }
+
+  ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const {
+    return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_);
+  }
+
+  StackMapStream* GetStackMapStream() { return &stack_map_stream_; }
+
+  void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) {
+    jit_string_roots_.Overwrite(string_reference,
+                                reinterpret_cast64<uint64_t>(string.GetReference()));
+  }
+
+  uint64_t GetJitStringRootIndex(StringReference string_reference) const {
+    return jit_string_roots_.Get(string_reference);
+  }
+
+  size_t GetNumberOfJitStringRoots() const {
+    return jit_string_roots_.size();
+  }
+
+  void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) {
+    jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference()));
+  }
+
+  uint64_t GetJitClassRootIndex(TypeReference type_reference) const {
+    return jit_class_roots_.Get(type_reference);
+  }
+
+  size_t GetNumberOfJitClassRoots() const {
+    return jit_class_roots_.size();
+  }
+
+  size_t GetNumberOfJitRoots() const {
+    return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots();
+  }
+
+  void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set)
+      : allocator_(std::move(allocator)),
+        stack_map_stream_(&allocator_, instruction_set),
+        slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)),
+        jit_string_roots_(StringReferenceValueComparator(),
+                          allocator_.Adapter(kArenaAllocCodeGenerator)),
+        jit_class_roots_(TypeReferenceValueComparator(),
+                         allocator_.Adapter(kArenaAllocCodeGenerator)) {
+    slow_paths_.reserve(kDefaultSlowPathsCapacity);
+  }
+
+  static constexpr size_t kDefaultSlowPathsCapacity = 8;
+
+  ScopedArenaAllocator allocator_;
+  StackMapStream stack_map_stream_;
+  ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
+
+  // Maps a StringReference (dex_file, string_index) to the index in the literal table.
+  // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots`
+  // will compute all the indices.
+  ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
+
+  // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
+  // Entries are initially added with a pointer in the handle zone, and `EmitJitRoots`
+  // will compute all the indices.
+  ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATION_DATA_H_
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index c9f42b52f5..404a42771f 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -44,6 +44,7 @@
 #include "base/leb128.h"
 #include "class_linker.h"
 #include "class_root-inl.h"
+#include "code_generation_data.h"
 #include "dex/bytecode_utils.h"
 #include "dex/code_item_accessors-inl.h"
 #include "graph_visualizer.h"
@@ -141,122 +142,6 @@ static bool CheckTypeConsistency(HInstruction* instruction) {
   return true;
 }
 
-class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> {
- public:
-  static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack,
-                                                    InstructionSet instruction_set) {
-    ScopedArenaAllocator allocator(arena_stack);
-    void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator);
-    return std::unique_ptr<CodeGenerationData>(
-        ::new (memory) CodeGenerationData(std::move(allocator), instruction_set));
-  }
-
-  ScopedArenaAllocator* GetScopedAllocator() {
-    return &allocator_;
-  }
-
-  void AddSlowPath(SlowPathCode* slow_path) {
-    slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path));
-  }
-
-  ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const {
-    return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_);
-  }
-
-  StackMapStream* GetStackMapStream() { return &stack_map_stream_; }
-
-  void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) {
-    jit_string_roots_.Overwrite(string_reference,
-                                reinterpret_cast64<uint64_t>(string.GetReference()));
-  }
-
-  uint64_t GetJitStringRootIndex(StringReference string_reference) const {
-    return jit_string_roots_.Get(string_reference);
-  }
-
-  size_t GetNumberOfJitStringRoots() const {
-    return jit_string_roots_.size();
-  }
-
-  void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) {
-    jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference()));
-  }
-
-  uint64_t GetJitClassRootIndex(TypeReference type_reference) const {
-    return jit_class_roots_.Get(type_reference);
-  }
-
-  size_t GetNumberOfJitClassRoots() const {
-    return jit_class_roots_.size();
-  }
-
-  size_t GetNumberOfJitRoots() const {
-    return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots();
-  }
-
-  void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
- private:
-  CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set)
-      : allocator_(std::move(allocator)),
-        stack_map_stream_(&allocator_, instruction_set),
-        slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)),
-        jit_string_roots_(StringReferenceValueComparator(),
-                          allocator_.Adapter(kArenaAllocCodeGenerator)),
-        jit_class_roots_(TypeReferenceValueComparator(),
-                         allocator_.Adapter(kArenaAllocCodeGenerator)) {
-    slow_paths_.reserve(kDefaultSlowPathsCapacity);
-  }
-
-  static constexpr size_t kDefaultSlowPathsCapacity = 8;
-
-  ScopedArenaAllocator allocator_;
-  StackMapStream stack_map_stream_;
-  ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_;
-
-  // Maps a StringReference (dex_file, string_index) to the index in the literal table.
-  // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
-  // will compute all the indices.
-  ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_;
-
-  // Maps a ClassReference (dex_file, type_index) to the index in the literal table.
-  // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots`
-  // will compute all the indices.
-  ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_;
-};
-
-void CodeGenerator::CodeGenerationData::EmitJitRoots(
-    /*out*/std::vector<Handle<mirror::Object>>* roots) {
-  DCHECK(roots->empty());
-  roots->reserve(GetNumberOfJitRoots());
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  size_t index = 0;
-  for (auto& entry : jit_string_roots_) {
-    // Update the `roots` with the string, and replace the address temporarily
-    // stored to the index in the table.
-    uint64_t address = entry.second;
-    roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
-    DCHECK(roots->back() != nullptr);
-    DCHECK(roots->back()->IsString());
-    entry.second = index;
-    // Ensure the string is strongly interned. This is a requirement on how the JIT
-    // handles strings. b/32995596
-    class_linker->GetInternTable()->InternStrong(roots->back()->AsString());
-    ++index;
-  }
-  for (auto& entry : jit_class_roots_) {
-    // Update the `roots` with the class, and replace the address temporarily
-    // stored to the index in the table.
-    uint64_t address = entry.second;
-    roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
-    DCHECK(roots->back() != nullptr);
-    DCHECK(roots->back()->IsClass());
-    entry.second = index;
-    ++index;
-  }
-}
-
 ScopedArenaAllocator* CodeGenerator::GetScopedAllocator() {
   DCHECK(code_generation_data_ != nullptr);
   return code_generation_data_->GetScopedAllocator();
@@ -288,8 +173,8 @@ uint64_t CodeGenerator::GetJitClassRootIndex(TypeReference type_reference) {
   return code_generation_data_->GetJitClassRootIndex(type_reference);
 }
 
-void CodeGenerator::EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED,
-                                       const uint8_t* roots_data ATTRIBUTE_UNUSED) {
+void CodeGenerator::EmitJitRootPatches([[maybe_unused]] uint8_t* code,
+                                       [[maybe_unused]] const uint8_t* roots_data) {
   DCHECK(code_generation_data_ != nullptr);
   DCHECK_EQ(code_generation_data_->GetNumberOfJitStringRoots(), 0u);
   DCHECK_EQ(code_generation_data_->GetNumberOfJitClassRoots(), 0u);
@@ -378,7 +263,7 @@ void CodeGenerator::InitializeCodeGenerationData() {
   code_generation_data_ = CodeGenerationData::Create(graph_->GetArenaStack(), GetInstructionSet());
 }
 
-void CodeGenerator::Compile(CodeAllocator* allocator) {
+void CodeGenerator::Compile() {
   InitializeCodeGenerationData();
 
   // The register allocator already called `InitializeCodeGeneration`,
@@ -394,7 +279,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
                                    fpu_spill_mask_,
                                    GetGraph()->GetNumberOfVRegs(),
                                    GetGraph()->IsCompilingBaseline(),
-                                   GetGraph()->IsDebuggable());
+                                   GetGraph()->IsDebuggable(),
+                                   GetGraph()->HasShouldDeoptimizeFlag());
 
   size_t frame_start = GetAssembler()->CodeSize();
   GenerateFrameEntry();
@@ -443,32 +329,28 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
   }
 
   // Finalize instructions in assember;
-  Finalize(allocator);
+  Finalize();
 
   GetStackMapStream()->EndMethod(GetAssembler()->CodeSize());
 }
 
-void CodeGenerator::Finalize(CodeAllocator* allocator) {
-  size_t code_size = GetAssembler()->CodeSize();
-  uint8_t* buffer = allocator->Allocate(code_size);
-
-  MemoryRegion code(buffer, code_size);
-  GetAssembler()->FinalizeInstructions(code);
+void CodeGenerator::Finalize() {
+  GetAssembler()->FinalizeCode();
 }
 
 void CodeGenerator::EmitLinkerPatches(
-    ArenaVector<linker::LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] ArenaVector<linker::LinkerPatch>* linker_patches) {
   // No linker patches by default.
 }
 
-bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const {
+bool CodeGenerator::NeedsThunkCode([[maybe_unused]] const linker::LinkerPatch& patch) const {
   // Code generators that create patches requiring thunk compilation should override this function.
   return false;
 }
 
-void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
-                                  /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED,
-                                  /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) {
+void CodeGenerator::EmitThunkCode([[maybe_unused]] const linker::LinkerPatch& patch,
+                                  [[maybe_unused]] /*out*/ ArenaVector<uint8_t>* code,
+                                  [[maybe_unused]] /*out*/ std::string* debug_name) {
   // Code generators that create patches requiring thunk compilation should override this function.
   LOG(FATAL) << "Unexpected call to EmitThunkCode().";
 }
@@ -745,8 +627,8 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary(
       locations->SetOut(calling_convention.GetReturnLocation(field_type));
     }
   } else {
-     size_t set_index = is_instance ? 1 : 0;
-     if (DataType::IsFloatingPointType(field_type)) {
+    size_t set_index = is_instance ? 1 : 0;
+    if (DataType::IsFloatingPointType(field_type)) {
       // The set value comes from a float location while the calling convention
       // expects it in a regular register location. Allocate a temp for it and
       // make the transfer at codegen.
@@ -1028,6 +910,12 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
           new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats));
     }
 #endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+    case InstructionSet::kRiscv64: {
+      return std::unique_ptr<CodeGenerator>(
+          new (allocator) riscv64::CodeGeneratorRISCV64(graph, compiler_options, stats));
+    }
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case InstructionSet::kX86: {
       return std::unique_ptr<CodeGenerator>(
@@ -1834,8 +1722,8 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary*
 void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
-  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
-  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
 
   // The positions must be non-negative.
   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -1845,7 +1733,7 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
   }
 
   // The length must be >= 0.
-  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
   if (length != nullptr) {
     int32_t len = length->GetValue();
     if (len < 0) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9872efaa4a..cd44fb3fa7 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -59,8 +59,12 @@ static int32_t constexpr kPrimIntMax = 0x7fffffff;
 // Maximum value for a primitive long.
 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
 
-static const ReadBarrierOption gCompilerReadBarrierOption =
-    gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+// Depending on configuration, `gUseReadBarrier` can be a static const variable.
+// Static variable initialization order across different compilation units is not defined,
+// so function is used instead of static variable `gCompilerReadBarrierOption`.
+inline ReadBarrierOption GetCompilerReadBarrierOption() {
+  return gUseReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+}
 
 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
 constexpr size_t status_byte_offset =
@@ -73,6 +77,7 @@ constexpr uint32_t shifted_initialized_value =
     enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
 
 class Assembler;
+class CodeGenerationData;
 class CodeGenerator;
 class CompilerOptions;
 class StackMapStream;
@@ -82,18 +87,6 @@ namespace linker {
 class LinkerPatch;
 }  // namespace linker
 
-class CodeAllocator {
- public:
-  CodeAllocator() {}
-  virtual ~CodeAllocator() {}
-
-  virtual uint8_t* Allocate(size_t size) = 0;
-  virtual ArrayRef<const uint8_t> GetMemory() const = 0;
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
-};
-
 class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> {
  public:
   explicit SlowPathCode(HInstruction* instruction) : instruction_(instruction) {
@@ -200,7 +193,7 @@ class FieldAccessCallingConvention {
 class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
  public:
   // Compiles the graph to executable instructions.
-  void Compile(CodeAllocator* allocator);
+  void Compile();
   static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
                                                const CompilerOptions& compiler_options,
                                                OptimizingCompilerStats* stats = nullptr);
@@ -221,7 +214,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
   }
 
   virtual void Initialize() = 0;
-  virtual void Finalize(CodeAllocator* allocator);
+  virtual void Finalize();
   virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
   virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
   virtual void EmitThunkCode(const linker::LinkerPatch& patch,
@@ -278,14 +271,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
     fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_;
   }
 
-  static uint32_t ComputeRegisterMask(const int* registers, size_t length) {
-    uint32_t mask = 0;
-    for (size_t i = 0, e = length; i < e; ++i) {
-      mask |= (1 << registers[i]);
-    }
-    return mask;
-  }
-
   virtual void DumpCoreRegister(std::ostream& stream, int reg) const = 0;
   virtual void DumpFloatingPointRegister(std::ostream& stream, int reg) const = 0;
   virtual InstructionSet GetInstructionSet() const = 0;
@@ -731,6 +716,11 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
   static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
   static ScaleFactor ScaleFactorForType(DataType::Type type);
 
+  ArrayRef<const uint8_t> GetCode() const {
+    return ArrayRef<const uint8_t>(GetAssembler().CodeBufferBaseAddress(),
+                                   GetAssembler().CodeSize());
+  }
+
  protected:
   // Patch info used for recording locations of required linker patches and their targets,
   // i.e. target method, string, type or code identified by their dex file and index,
@@ -761,6 +751,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
 
+  template <typename RegType>
+  static uint32_t ComputeRegisterMask(const RegType* registers, size_t length) {
+    uint32_t mask = 0;
+    for (size_t i = 0, e = length; i < e; ++i) {
+      mask |= (1 << registers[i]);
+    }
+    return mask;
+  }
+
   // Returns the location of the first spilled entry for floating point registers,
   // relative to the stack pointer.
   uint32_t GetFpuSpillStart() const {
@@ -814,6 +813,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
 
   StackMapStream* GetStackMapStream();
 
+  CodeGenerationData* GetCodeGenerationData() {
+    return code_generation_data_.get();
+  }
+
   void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string);
   uint64_t GetJitStringRootIndex(StringReference string_reference);
   void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass);
@@ -848,8 +851,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
   DisassemblyInformation* disasm_info_;
 
  private:
-  class CodeGenerationData;
-
   void InitializeCodeGenerationData();
   size_t GetStackOffsetOfSavedRegister(size_t index);
   void GenerateSlowPaths();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 41db9a2542..89172aaebc 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -35,6 +35,7 @@
 #include "interpreter/mterp/nterp.h"
 #include "intrinsics.h"
 #include "intrinsics_arm64.h"
+#include "intrinsics_list.h"
 #include "intrinsics_utils.h"
 #include "linker/linker_patch.h"
 #include "lock_word.h"
@@ -45,6 +46,7 @@
 #include "optimizing/common_arm64.h"
 #include "optimizing/nodes.h"
 #include "thread.h"
+#include "trace.h"
 #include "utils/arm64/assembler_arm64.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
@@ -88,6 +90,9 @@ using helpers::VIXLRegCodeFromART;
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
 
+// TODO(mythria): Expand SystemRegister in vixl to include this value.
+uint16_t SYS_CNTVCT_EL0 = SystemRegisterEncoder<1, 3, 14, 0, 2>::value;
+
 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
 // generates less code/data with a small num_entries.
@@ -936,6 +941,7 @@ Location CriticalNativeCallingConventionVisitorARM64::GetMethodLocation() const
 }
 
 namespace detail {
+
 // Mark which intrinsics we don't have handcrafted code for.
 template <Intrinsics T>
 struct IsUnimplemented {
@@ -950,15 +956,13 @@ struct IsUnimplemented {
 UNIMPLEMENTED_INTRINSIC_LIST_ARM64(TRUE_OVERRIDE)
 #undef TRUE_OVERRIDE
 
-#include "intrinsics_list.h"
 static constexpr bool kIsIntrinsicUnimplemented[] = {
-  false,  // kNone
+    false,  // kNone
 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
-  IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
-  INTRINSICS_LIST(IS_UNIMPLEMENTED)
+    IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+    ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
 #undef IS_UNIMPLEMENTED
 };
-#undef INTRINSICS_LIST
 
 }  // namespace detail
 
@@ -995,14 +999,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-      uint32_literals_(std::less<uint32_t>(),
-                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-      uint64_literals_(std::less<uint64_t>(),
-                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-      jit_string_patches_(StringReferenceValueComparator(),
-                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
-      jit_class_patches_(TypeReferenceValueComparator(),
-                         graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      jit_patches_(&assembler_, graph->GetAllocator()),
       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
   // Save the link register (containing the return address) to mimic Quick.
@@ -1036,7 +1033,7 @@ void CodeGeneratorARM64::EmitJumpTables() {
   }
 }
 
-void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorARM64::Finalize() {
   EmitJumpTables();
 
   // Emit JIT baker read barrier slow paths.
@@ -1051,11 +1048,11 @@ void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
   // Ensure we emit the literal pool.
   __ FinalizeCode();
 
-  CodeGenerator::Finalize(allocator);
+  CodeGenerator::Finalize();
 
   // Verify Baker read barrier linker patches.
   if (kIsDebugBuild) {
-    ArrayRef<const uint8_t> code = allocator->GetMemory();
+    ArrayRef<const uint8_t> code(GetCode());
     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
       DCHECK(info.label.IsBound());
       uint32_t literal_offset = info.label.GetLocation();
@@ -1192,8 +1189,9 @@ void LocationsBuilderARM64::VisitMethodExitHook(HMethodExitHook* method_hook) {
 void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* instruction) {
   MacroAssembler* masm = GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
-  Register temp = temps.AcquireX();
-  Register value = temps.AcquireW();
+  Register addr = temps.AcquireX();
+  Register index = temps.AcquireX();
+  Register value = index.W();
 
   SlowPathCodeARM64* slow_path =
       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARM64(instruction);
@@ -1213,9 +1211,44 @@ void InstructionCodeGeneratorARM64::GenerateMethodEntryExitHook(HInstruction* in
   MemberOffset  offset = instruction->IsMethodExitHook() ?
       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
-  __ Mov(temp, address + offset.Int32Value());
-  __ Ldrb(value, MemOperand(temp, 0));
-  __ Cbnz(value, slow_path->GetEntryLabel());
+  __ Mov(addr, address + offset.Int32Value());
+  __ Ldrb(value, MemOperand(addr, 0));
+  __ Cmp(value, Operand(instrumentation::Instrumentation::kFastTraceListeners));
+  // Check if there are any method entry / exit listeners. If no, continue.
+  __ B(lt, slow_path->GetExitLabel());
+  // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+  // If yes, just take the slow path.
+  __ B(gt, slow_path->GetEntryLabel());
+
+  // Check if there is place in the buffer to store a new entry, if no, take slow path.
+  uint32_t trace_buffer_index_offset =
+      Thread::TraceBufferIndexOffset<kArm64PointerSize>().Int32Value();
+  __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
+  __ Subs(index, index, kNumEntriesForWallClock);
+  __ B(lt, slow_path->GetEntryLabel());
+
+  // Update the index in the `Thread`.
+  __ Str(index, MemOperand(tr, trace_buffer_index_offset));
+  // Calculate the entry address in the buffer.
+  // addr = base_addr + sizeof(void*) * index;
+  __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArm64PointerSize>().SizeValue()));
+  __ ComputeAddress(addr, MemOperand(addr, index, LSL, TIMES_8));
+
+  Register tmp = index;
+  // Record method pointer and trace action.
+  __ Ldr(tmp, MemOperand(sp, 0));
+  // Use last two bits to encode trace method action. For MethodEntry it is 0
+  // so no need to set the bits since they are 0 already.
+  if (instruction->IsMethodExitHook()) {
+    DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+    __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+  }
+  __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
+  // Record the timestamp.
+  __ Mrs(tmp, (SystemRegister)SYS_CNTVCT_EL0);
+  __ Str(tmp, MemOperand(addr, kTimestampOffsetInBytes));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1264,7 +1297,7 @@ void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
     UseScratchRegisterScope temps(masm);
     Register temp = temps.AcquireX();
     Register counter = temps.AcquireW();
-    __ Ldr(temp, DeduplicateUint64Literal(address));
+    __ Ldr(temp, jit_patches_.DeduplicateUint64Literal(address));
     __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
     __ Cbz(counter, slow_path->GetEntryLabel());
     __ Add(counter, counter, -1);
@@ -1532,15 +1565,15 @@ size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_
   return kArm64WordSize;
 }
 
-size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                                     uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARM64::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
+                                                     [[maybe_unused]] uint32_t reg_id) {
   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
              << "use SaveRestoreLiveRegistersHelper";
   UNREACHABLE();
 }
 
-size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                                        uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARM64::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
+                                                        [[maybe_unused]] uint32_t reg_id) {
   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
              << "use SaveRestoreLiveRegistersHelper";
   UNREACHABLE();
@@ -3647,7 +3680,7 @@ void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
 }
 
 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
-    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HDoubleConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -3655,8 +3688,7 @@ void LocationsBuilderARM64::VisitExit(HExit* exit) {
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorARM64::VisitExit([[maybe_unused]] HExit* exit) {}
 
 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
   LocationSummary* locations =
@@ -3664,7 +3696,7 @@ void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -3747,7 +3779,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
     // The condition instruction has been materialized, compare the output to 0.
     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
     DCHECK(cond_val.IsRegister());
-      if (true_target == nullptr) {
+    if (true_target == nullptr) {
       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
     } else {
       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
@@ -3876,7 +3908,7 @@ static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
 }
 
 static inline Condition GetConditionForSelect(HCondition* condition) {
-  IfCondition cond = condition->AsCondition()->GetCondition();
+  IfCondition cond = condition->GetCondition();
   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
                                                      : ARM64Condition(cond);
 }
@@ -3888,8 +3920,8 @@ void LocationsBuilderARM64::VisitSelect(HSelect* select) {
     locations->SetInAt(1, Location::RequiresFpuRegister());
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
-    HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+    HConstant* cst_true_value = select->GetTrueValue()->AsConstantOrNull();
+    HConstant* cst_false_value = select->GetFalseValue()->AsConstantOrNull();
     bool is_true_value_constant = cst_true_value != nullptr;
     bool is_false_value_constant = cst_false_value != nullptr;
     // Ask VIXL whether we should synthesize constants in registers.
@@ -4308,7 +4340,6 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
   } else {
     locations->SetInAt(1, Location::RequiresRegister());
   }
-  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -4478,12 +4509,11 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
                                         kWithoutReadBarrier);
 
       // /* HeapReference<Class> */ temp = temp->iftable_
-      GenerateReferenceLoadTwoRegisters(instruction,
-                                        temp_loc,
-                                        temp_loc,
-                                        iftable_offset,
-                                        maybe_temp2_loc,
-                                        kWithoutReadBarrier);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       iftable_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // Iftable is never null.
       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
       // Loop through the iftable and check if any class matches.
@@ -4525,7 +4555,7 @@ void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -4534,7 +4564,7 @@ void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -4709,8 +4739,8 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege
 }
 
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
-      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
-      ArtMethod* method ATTRIBUTE_UNUSED) {
+    const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+    [[maybe_unused]] ArtMethod* method) {
   // On ARM64 we support all dispatch types.
   return desired_dispatch_info;
 }
@@ -4749,7 +4779,8 @@ void CodeGeneratorARM64::LoadMethod(MethodLoadKind load_kind, Location temp, HIn
     case MethodLoadKind::kJitDirectAddress: {
       // Load method address from literal pool.
       __ Ldr(XRegisterFrom(temp),
-             DeduplicateUint64Literal(reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
+             jit_patches_.DeduplicateUint64Literal(
+                 reinterpret_cast<uint64_t>(invoke->GetResolvedMethod())));
       break;
     }
     case MethodLoadKind::kRuntimeCall: {
@@ -4775,14 +4806,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
       break;
     }
-    case MethodLoadKind::kRecursive: {
+    case MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
       break;
-    }
-    case MethodLoadKind::kRuntimeCall: {
+    case MethodLoadKind::kRuntimeCall:
       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
       return;  // No code pointer retrieval; the runtime performs the call directly.
-    }
     case MethodLoadKind::kBootImageLinkTimePcRelative:
       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
       if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
@@ -4798,10 +4827,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
         break;
       }
       FALLTHROUGH_INTENDED;
-    default: {
+    default:
       LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
       break;
-    }
   }
 
   auto call_lr = [&]() {
@@ -4906,6 +4934,7 @@ void CodeGeneratorARM64::GenerateVirtualCall(
   }
   // Instead of simply (possibly) unpoisoning `temp` here, we should
   // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
   // intermediate/temporary reference and because the current
   // concurrent copying collector keeps the from-space memory
   // intact/accessible until the end of the marking phase (the
@@ -5090,25 +5119,8 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
   return label;
 }
 
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
-    uint64_t address) {
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
-    const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
-  ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
-  return jit_string_patches_.GetOrCreate(
-      StringReference(&dex_file, string_index),
-      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
-    const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
-  ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
-  return jit_class_patches_.GetOrCreate(
-      TypeReference(&dex_file, type_index),
-      [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
+void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  jit_patches_.EmitJitRootPatches(code, roots_data, *GetCodeGenerationData());
 }
 
 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -5332,19 +5344,7 @@ void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
   assembler.FinalizeCode();
   code->resize(assembler.CodeSize());
   MemoryRegion code_region(code->data(), code->size());
-  assembler.FinalizeInstructions(code_region);
-}
-
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
-  return uint32_literals_.GetOrCreate(
-      value,
-      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
-}
-
-vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
-  return uint64_literals_.GetOrCreate(
-      value,
-      [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
+  assembler.CopyInstructions(code_region);
 }
 
 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -5370,13 +5370,8 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
     return;
   }
 
-  {
-    // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
-    // are no pools emitted.
-    EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
-    codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-    DCHECK(!codegen_->IsLeafMethod());
-  }
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+  DCHECK(!codegen_->IsLeafMethod());
 
   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
 }
@@ -5434,7 +5429,9 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
-  if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
+  if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
     if (!gUseReadBarrier || kUseBakerReadBarrier) {
       // Rely on the type resolution or initialization and marking to save everything we need.
       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
@@ -5460,9 +5457,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
   Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
 
-  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
-      ? kWithoutReadBarrier
-      : gCompilerReadBarrierOption;
+  const ReadBarrierOption read_barrier_option =
+      cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
   bool generate_null_check = false;
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5600,7 +5596,7 @@ void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
 }
 
-void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitClearException([[maybe_unused]] HClearException* clear) {
   __ Str(wzr, GetExceptionTlsAddress());
 }
 
@@ -5685,7 +5681,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
                                         temp,
                                         /* offset placeholder */ 0u,
                                         ldr_label,
-                                        gCompilerReadBarrierOption);
+                                        GetCompilerReadBarrierOption());
       SlowPathCodeARM64* slow_path =
           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
       codegen_->AddSlowPath(slow_path);
@@ -5709,14 +5705,13 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
                                         out.X(),
                                         /* offset= */ 0,
                                         /* fixup_label= */ nullptr,
-                                        gCompilerReadBarrierOption);
+                                        GetCompilerReadBarrierOption());
       return;
     }
     default:
       break;
   }
 
-  // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
@@ -5730,7 +5725,7 @@ void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -5930,7 +5925,7 @@ void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
   HandleBinaryOp(instruction);
 }
 
-void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARM64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -5957,7 +5952,7 @@ void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
 }
 
 void InstructionCodeGeneratorARM64::VisitParameterValue(
-    HParameterValue* instruction ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HParameterValue* instruction) {
   // Nothing to do, the parameter is already at its location.
 }
 
@@ -5968,7 +5963,7 @@ void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
 }
 
 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
-    HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HCurrentMethod* instruction) {
   // Nothing to do, the method is already at its location.
 }
 
@@ -5980,7 +5975,7 @@ void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitPhi([[maybe_unused]] HPhi* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -6175,7 +6170,7 @@ void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor
 }
 
 void InstructionCodeGeneratorARM64::VisitConstructorFence(
-    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HConstructorFence* constructor_fence) {
   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
 }
 
@@ -6215,7 +6210,7 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
   instruction->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
   codegen_->GenerateFrameExit();
 }
 
@@ -6353,6 +6348,9 @@ void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
   // In suspend check slow path, usually there are no caller-save registers at all.
   // If SIMD instructions are present, however, we force spilling all live SIMD
   // registers in full width (since the runtime only saves/restores lower part).
+  // Note that only a suspend check can see live SIMD registers. In the
+  // loop optimization, we make sure this does not happen for any other slow
+  // path.
   locations->SetCustomSlowPathCallerSaves(
       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
 }
@@ -6467,12 +6465,12 @@ void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
   HandleBinaryOp(instruction);
 }
 
-void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARM64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
@@ -7018,32 +7016,6 @@ void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instructi
   }
 }
 
-static void PatchJitRootUse(uint8_t* code,
-                            const uint8_t* roots_data,
-                            vixl::aarch64::Literal<uint32_t>* literal,
-                            uint64_t index_in_table) {
-  uint32_t literal_offset = literal->GetOffset();
-  uintptr_t address =
-      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
-  uint8_t* data = code + literal_offset;
-  reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
-}
-
-void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
-  for (const auto& entry : jit_string_patches_) {
-    const StringReference& string_reference = entry.first;
-    vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
-    uint64_t index_in_table = GetJitStringRootIndex(string_reference);
-    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
-  }
-  for (const auto& entry : jit_class_patches_) {
-    const TypeReference& type_reference = entry.first;
-    vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
-    uint64_t index_in_table = GetJitClassRootIndex(type_reference);
-    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
-  }
-}
-
 MemOperand InstructionCodeGeneratorARM64::VecNEONAddress(
     HVecMemoryOperation* instruction,
     UseScratchRegisterScope* temps_scope,
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 6190364d1d..957f85aa21 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -26,6 +26,7 @@
 #include "dex/string_reference.h"
 #include "dex/type_reference.h"
 #include "driver/compiler_options.h"
+#include "jit_patches_arm64.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 #include "utils/arm64/assembler_arm64.h"
@@ -50,30 +51,29 @@ class CodeGeneratorARM64;
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kArm64WordSize = static_cast<size_t>(kArm64PointerSize);
 
-// These constants are used as an approximate margin when emission of veneer and literal pools
+// This constant is used as an approximate margin when emission of veneer and literal pools
 // must be blocked.
 static constexpr int kMaxMacroInstructionSizeInBytes = 15 * vixl::aarch64::kInstructionSize;
-static constexpr int kInvokeCodeMarginSizeInBytes = 6 * kMaxMacroInstructionSizeInBytes;
 
 static const vixl::aarch64::Register kParameterCoreRegisters[] = {
-  vixl::aarch64::x1,
-  vixl::aarch64::x2,
-  vixl::aarch64::x3,
-  vixl::aarch64::x4,
-  vixl::aarch64::x5,
-  vixl::aarch64::x6,
-  vixl::aarch64::x7
+    vixl::aarch64::x1,
+    vixl::aarch64::x2,
+    vixl::aarch64::x3,
+    vixl::aarch64::x4,
+    vixl::aarch64::x5,
+    vixl::aarch64::x6,
+    vixl::aarch64::x7
 };
 static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
 static const vixl::aarch64::VRegister kParameterFPRegisters[] = {
-  vixl::aarch64::d0,
-  vixl::aarch64::d1,
-  vixl::aarch64::d2,
-  vixl::aarch64::d3,
-  vixl::aarch64::d4,
-  vixl::aarch64::d5,
-  vixl::aarch64::d6,
-  vixl::aarch64::d7
+    vixl::aarch64::d0,
+    vixl::aarch64::d1,
+    vixl::aarch64::d2,
+    vixl::aarch64::d3,
+    vixl::aarch64::d4,
+    vixl::aarch64::d5,
+    vixl::aarch64::d6,
+    vixl::aarch64::d7
 };
 static constexpr size_t kParameterFPRegistersLength = arraysize(kParameterFPRegisters);
 
@@ -116,7 +116,7 @@ const vixl::aarch64::CPURegList callee_saved_core_registers(
     vixl::aarch64::CPURegister::kRegister,
     vixl::aarch64::kXRegSize,
     (kReserveMarkingRegister ? vixl::aarch64::x21.GetCode() : vixl::aarch64::x20.GetCode()),
-     vixl::aarch64::x30.GetCode());
+    vixl::aarch64::x30.GetCode());
 const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegister::kVRegister,
                                                           vixl::aarch64::kDRegSize,
                                                           vixl::aarch64::d8.GetCode(),
@@ -192,34 +192,34 @@ class JumpTableARM64 : public DeletableArenaObject<kArenaAllocSwitchTable> {
   DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
 };
 
-static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] =
-    { vixl::aarch64::x0,
-      vixl::aarch64::x1,
-      vixl::aarch64::x2,
-      vixl::aarch64::x3,
-      vixl::aarch64::x4,
-      vixl::aarch64::x5,
-      vixl::aarch64::x6,
-      vixl::aarch64::x7 };
+static const vixl::aarch64::Register kRuntimeParameterCoreRegisters[] = {
+    vixl::aarch64::x0,
+    vixl::aarch64::x1,
+    vixl::aarch64::x2,
+    vixl::aarch64::x3,
+    vixl::aarch64::x4,
+    vixl::aarch64::x5,
+    vixl::aarch64::x6,
+    vixl::aarch64::x7
+};
 static constexpr size_t kRuntimeParameterCoreRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
-static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] =
-    { vixl::aarch64::d0,
-      vixl::aarch64::d1,
-      vixl::aarch64::d2,
-      vixl::aarch64::d3,
-      vixl::aarch64::d4,
-      vixl::aarch64::d5,
-      vixl::aarch64::d6,
-      vixl::aarch64::d7 };
+static const vixl::aarch64::VRegister kRuntimeParameterFpuRegisters[] = {
+    vixl::aarch64::d0,
+    vixl::aarch64::d1,
+    vixl::aarch64::d2,
+    vixl::aarch64::d3,
+    vixl::aarch64::d4,
+    vixl::aarch64::d5,
+    vixl::aarch64::d6,
+    vixl::aarch64::d7
+};
 static constexpr size_t kRuntimeParameterFpuRegistersLength =
     arraysize(kRuntimeParameterCoreRegisters);
 
 class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::Register,
                                                                 vixl::aarch64::VRegister> {
  public:
-  static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
-
   InvokeRuntimeCallingConvention()
       : CallingConvention(kRuntimeParameterCoreRegisters,
                           kRuntimeParameterCoreRegistersLength,
@@ -304,16 +304,16 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
   Location GetFieldIndexLocation() const override {
     return helpers::LocationFrom(vixl::aarch64::x0);
   }
-  Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+  Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
     return helpers::LocationFrom(vixl::aarch64::x0);
   }
-  Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
+  Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
                                bool is_instance) const override {
     return is_instance
         ? helpers::LocationFrom(vixl::aarch64::x2)
         : helpers::LocationFrom(vixl::aarch64::x1);
   }
-  Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+  Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
     return helpers::LocationFrom(vixl::aarch64::d0);
   }
 
@@ -551,12 +551,31 @@ class InstructionCodeGeneratorARM64Sve : public InstructionCodeGeneratorARM64 {
   // register size (full SIMD register is used).
   void ValidateVectorLength(HVecOperation* instr) const;
 
-  // Returns default predicate register which is used as governing vector predicate
-  // to implement predicated loop execution.
+  vixl::aarch64::PRegister GetVecGoverningPReg(HVecOperation* instr) {
+    return GetVecPredSetFixedOutPReg(instr->GetGoverningPredicate());
+  }
+
+  // Returns a fixed p-reg for predicate setting instruction.
+  //
+  // Currently we only support diamond CF loops for predicated vectorization; also we don't have
+  // register allocator support for vector predicates. Thus we use fixed P-regs for loop main,
+  // True and False predicates as a temporary solution.
   //
-  // TODO: This is a hack to be addressed when register allocator supports SIMD types.
-  static vixl::aarch64::PRegister LoopPReg() {
-    return vixl::aarch64::p0;
+  // TODO: Support SIMD types and registers in ART.
+  static vixl::aarch64::PRegister GetVecPredSetFixedOutPReg(HVecPredSetOperation* instr) {
+    if (instr->IsVecPredWhile() || instr->IsVecPredSetAll()) {
+      // VecPredWhile and VecPredSetAll live ranges never overlap due to the current vectorization
+      // scheme: the former only is live inside a vectorized loop and the later is never in a
+      // loop and never spans across loops.
+      return vixl::aarch64::p0;
+    } else if (instr->IsVecPredNot()) {
+      // This relies on the fact that we only use PredNot manually in the autovectorizer,
+      // so there is only one of them in each loop.
+      return vixl::aarch64::p1;
+    } else {
+      DCHECK(instr->IsVecCondition());
+      return vixl::aarch64::p2;
+    }
   }
 };
 
@@ -698,7 +717,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
     return jump_tables_.back().get();
   }
 
-  void Finalize(CodeAllocator* allocator) override;
+  void Finalize() override;
 
   // Code generation helpers.
   void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
@@ -737,9 +756,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
 
   ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; }
 
-  bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
-    return false;
-  }
+  bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
 
   // Check if the desired_string_load_kind is supported. If it is, return it,
   // otherwise return a fall-back kind that should be used instead.
@@ -838,13 +855,21 @@ class CodeGeneratorARM64 : public CodeGenerator {
   // the associated patch for AOT or slow path for JIT.
   void EmitBakerReadBarrierCbnz(uint32_t custom_data);
 
-  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address) {
+    return jit_patches_.DeduplicateBootImageAddressLiteral(address);
+  }
   vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
                                                                 dex::StringIndex string_index,
-                                                                Handle<mirror::String> handle);
+                                                                Handle<mirror::String> handle) {
+    return jit_patches_.DeduplicateJitStringLiteral(
+        dex_file, string_index, handle, GetCodeGenerationData());
+  }
   vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(const DexFile& dex_file,
-                                                               dex::TypeIndex string_index,
-                                                               Handle<mirror::Class> handle);
+                                                               dex::TypeIndex class_index,
+                                                               Handle<mirror::Class> handle) {
+    return jit_patches_.DeduplicateJitClassLiteral(
+        dex_file, class_index, handle, GetCodeGenerationData());
+  }
 
   void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg);
   void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -1074,18 +1099,6 @@ class CodeGeneratorARM64 : public CodeGenerator {
                                     uint32_t encoded_data,
                                     /*out*/ std::string* debug_name);
 
-  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
-  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
-  using StringToLiteralMap = ArenaSafeMap<StringReference,
-                                          vixl::aarch64::Literal<uint32_t>*,
-                                          StringReferenceValueComparator>;
-  using TypeToLiteralMap = ArenaSafeMap<TypeReference,
-                                        vixl::aarch64::Literal<uint32_t>*,
-                                        TypeReferenceValueComparator>;
-
-  vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
-  vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
-
   // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
   // whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
   struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> {
@@ -1158,14 +1171,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
-  // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
-  Uint32ToLiteralMap uint32_literals_;
-  // Deduplication map for 64-bit literals, used for JIT for method address or method code.
-  Uint64ToLiteralMap uint64_literals_;
-  // Patches for string literals in JIT compiled code.
-  StringToLiteralMap jit_string_patches_;
-  // Patches for class literals in JIT compiled code.
-  TypeToLiteralMap jit_class_patches_;
+  JitPatchesARM64 jit_patches_;
 
   // Baker read barrier slow paths, mapping custom data (uint32_t) to label.
   // Wrap the label to work around vixl::aarch64::Label being non-copyable
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index d69e77045b..78bf316c17 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -33,6 +33,7 @@
 #include "interpreter/mterp/nterp.h"
 #include "intrinsics.h"
 #include "intrinsics_arm_vixl.h"
+#include "intrinsics_list.h"
 #include "intrinsics_utils.h"
 #include "linker/linker_patch.h"
 #include "mirror/array-inl.h"
@@ -40,6 +41,7 @@
 #include "mirror/var_handle.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
+#include "trace.h"
 #include "utils/arm/assembler_arm_vixl.h"
 #include "utils/arm/managed_register_arm.h"
 #include "utils/assembler.h"
@@ -1102,27 +1104,27 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
 }
 
 // Saves the register in the stack. Returns the size taken on stack.
-size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                              uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::SaveCoreRegister([[maybe_unused]] size_t stack_index,
+                                              [[maybe_unused]] uint32_t reg_id) {
   TODO_VIXL32(FATAL);
   UNREACHABLE();
 }
 
 // Restores the register from the stack. Returns the size taken on stack.
-size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                                 uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::RestoreCoreRegister([[maybe_unused]] size_t stack_index,
+                                                 [[maybe_unused]] uint32_t reg_id) {
   TODO_VIXL32(FATAL);
   UNREACHABLE();
 }
 
-size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                                       uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister([[maybe_unused]] size_t stack_index,
+                                                       [[maybe_unused]] uint32_t reg_id) {
   TODO_VIXL32(FATAL);
   UNREACHABLE();
 }
 
-size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                                          uint32_t reg_id ATTRIBUTE_UNUSED) {
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister([[maybe_unused]] size_t stack_index,
+                                                          [[maybe_unused]] uint32_t reg_id) {
   TODO_VIXL32(FATAL);
   UNREACHABLE();
 }
@@ -1908,6 +1910,7 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
 }
 
 namespace detail {
+
 // Mark which intrinsics we don't have handcrafted code for.
 template <Intrinsics T>
 struct IsUnimplemented {
@@ -1922,15 +1925,13 @@ struct IsUnimplemented {
 UNIMPLEMENTED_INTRINSIC_LIST_ARM(TRUE_OVERRIDE)
 #undef TRUE_OVERRIDE
 
-#include "intrinsics_list.h"
 static constexpr bool kIsIntrinsicUnimplemented[] = {
-  false,  // kNone
+    false,  // kNone
 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
-  IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
-  INTRINSICS_LIST(IS_UNIMPLEMENTED)
+    IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+    ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
 #undef IS_UNIMPLEMENTED
 };
-#undef INTRINSICS_LIST
 
 }  // namespace detail
 
@@ -2024,7 +2025,7 @@ void CodeGeneratorARMVIXL::FixJumpTables() {
 
 #define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
 
-void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorARMVIXL::Finalize() {
   FixJumpTables();
 
   // Emit JIT baker read barrier slow paths.
@@ -2037,11 +2038,11 @@ void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
   }
 
   GetAssembler()->FinalizeCode();
-  CodeGenerator::Finalize(allocator);
+  CodeGenerator::Finalize();
 
   // Verify Baker read barrier linker patches.
   if (kIsDebugBuild) {
-    ArrayRef<const uint8_t> code = allocator->GetMemory();
+    ArrayRef<const uint8_t> code(GetCode());
     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
       DCHECK(info.label.IsBound());
       uint32_t literal_offset = info.label.GetLocation();
@@ -2188,11 +2189,16 @@ void LocationsBuilderARMVIXL::VisitMethodExitHook(HMethodExitHook* method_hook)
   LocationSummary* locations = new (GetGraph()->GetAllocator())
       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
   locations->SetInAt(0, parameter_visitor_.GetReturnLocation(method_hook->InputAt(0)->GetType()));
+  // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
+  // compute the address to store the timestamp counter.
+  locations->AddRegisterTemps(3);
 }
 
 void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction* instruction) {
-  UseScratchRegisterScope temps(GetVIXLAssembler());
-  vixl32::Register temp = temps.Acquire();
+  LocationSummary* locations = instruction->GetLocations();
+  vixl32::Register addr = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register value = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register tmp = RegisterFrom(locations->GetTemp(2));
 
   SlowPathCodeARMVIXL* slow_path =
       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathARMVIXL(instruction);
@@ -2204,20 +2210,61 @@ void InstructionCodeGeneratorARMVIXL::GenerateMethodEntryExitHook(HInstruction*
     // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
     // disabled in debuggable runtime. The other bit is used when this method itself requires a
     // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
-    GetAssembler()->LoadFromOffset(kLoadWord,
-                                   temp,
-                                   sp,
-                                   codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
-    __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+    GetAssembler()->LoadFromOffset(
+        kLoadWord, value, sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+    __ CompareAndBranchIfNonZero(value, slow_path->GetEntryLabel());
   }
 
   MemberOffset  offset = instruction->IsMethodExitHook() ?
       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
   uint32_t address = reinterpret_cast32<uint32_t>(Runtime::Current()->GetInstrumentation());
-  __ Mov(temp, address + offset.Int32Value());
-  __ Ldrb(temp, MemOperand(temp, 0));
-  __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+  __ Mov(addr, address + offset.Int32Value());
+  __ Ldrb(value, MemOperand(addr, 0));
+  __ Cmp(value, instrumentation::Instrumentation::kFastTraceListeners);
+  // Check if there are any trace method entry / exit listeners. If no, continue.
+  __ B(lt, slow_path->GetExitLabel());
+  // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+  // If yes, just take the slow path.
+  __ B(gt, slow_path->GetEntryLabel());
+
+  // Check if there is place in the buffer to store a new entry, if no, take slow path.
+  uint32_t trace_buffer_index_offset =
+      Thread::TraceBufferIndexOffset<kArmPointerSize>().Int32Value();
+  vixl32::Register index = value;
+  __ Ldr(index, MemOperand(tr, trace_buffer_index_offset));
+  __ Subs(index, index, kNumEntriesForWallClock);
+  __ B(lt, slow_path->GetEntryLabel());
+
+  // Update the index in the `Thread`.
+  __ Str(index, MemOperand(tr, trace_buffer_index_offset));
+  // Calculate the entry address in the buffer.
+  // addr = base_addr + sizeof(void*) * index
+  __ Ldr(addr, MemOperand(tr, Thread::TraceBufferPtrOffset<kArmPointerSize>().SizeValue()));
+  __ Add(addr, addr, Operand(index, LSL, TIMES_4));
+
+  // Record method pointer and trace action.
+  __ Ldr(tmp, MemOperand(sp, 0));
+  // Use last two bits to encode trace method action. For MethodEntry it is 0
+  // so no need to set the bits since they are 0 already.
+  if (instruction->IsMethodExitHook()) {
+    DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+    __ Orr(tmp, tmp, Operand(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+  }
+  __ Str(tmp, MemOperand(addr, kMethodOffsetInBytes));
+
+  vixl32::Register tmp1 = index;
+  // See Architecture Reference Manual ARMv7-A and ARMv7-R edition section B4.1.34.
+  __ Mrrc(/* lower 32-bit */ tmp,
+          /* higher 32-bit */ tmp1,
+          /* coproc= */ 15,
+          /* opc1= */ 1,
+          /* crm= */ 14);
+  static_assert(kHighTimestampOffsetInBytes ==
+                kTimestampOffsetInBytes + static_cast<uint32_t>(kRuntimePointerSize));
+  __ Strd(tmp, tmp1, MemOperand(addr, kTimestampOffsetInBytes));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -2228,7 +2275,11 @@ void InstructionCodeGeneratorARMVIXL::VisitMethodExitHook(HMethodExitHook* instr
 }
 
 void LocationsBuilderARMVIXL::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
-  new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  // We need three temporary registers, two to load the timestamp counter (64-bit value) and one to
+  // compute the address to store the timestamp counter.
+  locations->AddRegisterTemps(3);
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitMethodEntryHook(HMethodEntryHook* instruction) {
@@ -2824,8 +2875,7 @@ void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorARMVIXL::VisitExit([[maybe_unused]] HExit* exit) {}
 
 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
                                                                    vixl32::Label* true_target,
@@ -3422,7 +3472,7 @@ void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -3432,7 +3482,7 @@ void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -3442,7 +3492,7 @@ void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -3453,7 +3503,7 @@ void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
-    HFloatConstant* constant ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HFloatConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -3464,7 +3514,7 @@ void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
-    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HDoubleConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -3473,7 +3523,7 @@ void LocationsBuilderARMVIXL::VisitConstructorFence(HConstructorFence* construct
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitConstructorFence(
-    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HConstructorFence* constructor_fence) {
   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
 }
 
@@ -3489,7 +3539,7 @@ void LocationsBuilderARMVIXL::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
   codegen_->GenerateFrameExit();
 }
 
@@ -5617,7 +5667,7 @@ void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction)
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
-    HParameterValue* instruction ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HParameterValue* instruction) {
   // Nothing to do, the parameter is already at its location.
 }
 
@@ -5628,7 +5678,7 @@ void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
-    HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HCurrentMethod* instruction) {
   // Nothing to do, the method is already at its location.
 }
 
@@ -5769,7 +5819,7 @@ void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitPhi([[maybe_unused]] HPhi* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -6104,8 +6154,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu
 Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
                                                                  Opcode opcode) {
   DCHECK(!DataType::IsFloatingPointType(constant->GetType()));
-  if (constant->IsConstant() &&
-      CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
+  if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
     return Location::ConstantLocation(constant);
   }
   return Location::RequiresRegister();
@@ -7234,7 +7283,7 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
   }
 }
 
-void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARMVIXL::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -7604,7 +7653,9 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
-  if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+  if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
     if (!gUseReadBarrier || kUseBakerReadBarrier) {
       // Rely on the type resolution or initialization and marking to save everything we need.
       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
@@ -7631,9 +7682,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
   Location out_loc = locations->Out();
   vixl32::Register out = OutputRegister(cls);
 
-  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
-      ? kWithoutReadBarrier
-      : gCompilerReadBarrierOption;
+  const ReadBarrierOption read_barrier_option =
+      cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
   bool generate_null_check = false;
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -7887,7 +7937,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
       codegen_->EmitMovwMovtPlaceholder(labels, out);
       // All aligned loads are implicitly atomic consume operations on ARM.
       codegen_->GenerateGcRootFieldLoad(
-          load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
+          load, out_loc, out, /*offset=*/0, GetCompilerReadBarrierOption());
       LoadStringSlowPathARMVIXL* slow_path =
           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
       codegen_->AddSlowPath(slow_path);
@@ -7908,14 +7958,13 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
                                                         load->GetString()));
       // /* GcRoot<mirror::String> */ out = *out
       codegen_->GenerateGcRootFieldLoad(
-          load, out_loc, out, /*offset=*/ 0, gCompilerReadBarrierOption);
+          load, out_loc, out, /*offset=*/0, GetCompilerReadBarrierOption());
       return;
     }
     default:
       break;
   }
 
-  // TODO: Re-add the compiler code to do string dex cache lookup again.
   DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
@@ -7944,7 +7993,7 @@ void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitClearException([[maybe_unused]] HClearException* clear) {
   UseScratchRegisterScope temps(GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
   __ Mov(temp, 0);
@@ -8490,12 +8539,11 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
                                         kWithoutReadBarrier);
 
       // /* HeapReference<Class> */ temp = temp->iftable_
-      GenerateReferenceLoadTwoRegisters(instruction,
-                                        temp_loc,
-                                        temp_loc,
-                                        iftable_offset,
-                                        maybe_temp2_loc,
-                                        kWithoutReadBarrier);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       iftable_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // Iftable is never null.
       __ Ldr(RegisterFrom(maybe_temp2_loc), MemOperand(temp, array_length_offset));
       // Loop through the iftable and check if any class matches.
@@ -9828,7 +9876,7 @@ void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
   assembler.FinalizeCode();
   code->resize(assembler.CodeSize());
   MemoryRegion code_region(code->data(), code->size());
-  assembler.FinalizeInstructions(code_region);
+  assembler.CopyInstructions(code_region);
 }
 
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
@@ -9867,12 +9915,12 @@ void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulat
   }
 }
 
-void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorARMVIXL::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index f5abe6951a..0175448fde 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -620,7 +620,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
     block_labels_.resize(GetGraph()->GetBlocks().size());
   }
 
-  void Finalize(CodeAllocator* allocator) override;
+  void Finalize() override;
 
   bool NeedsTwoRegisters(DataType::Type type) const override {
     return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64;
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
new file mode 100644
index 0000000000..7f23730143
--- /dev/null
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -0,0 +1,6494 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_riscv64.h"
+
+#include "android-base/logging.h"
+#include "android-base/macros.h"
+#include "arch/riscv64/jni_frame_riscv64.h"
+#include "arch/riscv64/registers_riscv64.h"
+#include "base/arena_containers.h"
+#include "base/macros.h"
+#include "code_generator_utils.h"
+#include "dwarf/register.h"
+#include "heap_poisoning.h"
+#include "intrinsics_list.h"
+#include "intrinsics_riscv64.h"
+#include "jit/profiling_info.h"
+#include "linker/linker_patch.h"
+#include "mirror/class-inl.h"
+#include "optimizing/nodes.h"
+#include "stack_map_stream.h"
+#include "utils/label.h"
+#include "utils/riscv64/assembler_riscv64.h"
+#include "utils/stack_checks.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+// Placeholder values embedded in instructions, patched at link time.
+constexpr uint32_t kLinkTimeOffsetPlaceholderHigh = 0x12345;
+constexpr uint32_t kLinkTimeOffsetPlaceholderLow = 0x678;
+
+// Compare-and-jump packed switch generates approx. 3 + 1.5 * N 32-bit
+// instructions for N cases.
+// Table-based packed switch generates approx. 10 32-bit instructions
+// and N 32-bit data words for N cases.
+// We switch to the table-based method starting with 6 entries.
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 6;
+
+// FCLASS returns a 10-bit classification mask with the two highest bits marking NaNs
+// (signaling and quiet). To detect a NaN, we can compare (either BGE or BGEU, the sign
+// bit is always clear) the result with the `kFClassNaNMinValue`.
+static_assert(kSignalingNaN == 0x100);
+static_assert(kQuietNaN == 0x200);
+static constexpr int32_t kFClassNaNMinValue = 0x100;
+
+static constexpr XRegister kCoreCalleeSaves[] = {
+    // S1(TR) is excluded as the ART thread register.
+    S0, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, RA
+};
+
+static constexpr FRegister kFpuCalleeSaves[] = {
+    FS0, FS1, FS2, FS3, FS4, FS5, FS6, FS7, FS8, FS9, FS10, FS11
+};
+
+#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, x).Int32Value()
+
+Location RegisterOrZeroBitPatternLocation(HInstruction* instruction) {
+  return IsZeroBitPattern(instruction)
+      ? Location::ConstantLocation(instruction)
+      : Location::RequiresRegister();
+}
+
+XRegister InputXRegisterOrZero(Location location) {
+  if (location.IsConstant()) {
+    DCHECK(location.GetConstant()->IsZeroBitPattern());
+    return Zero;
+  } else {
+    return location.AsRegister<XRegister>();
+  }
+}
+
+Location ValueLocationForStore(HInstruction* value) {
+  if (IsZeroBitPattern(value)) {
+    return Location::ConstantLocation(value);
+  } else if (DataType::IsFloatingPointType(value->GetType())) {
+    return Location::RequiresFpuRegister();
+  } else {
+    return Location::RequiresRegister();
+  }
+}
+
+Location Riscv64ReturnLocation(DataType::Type return_type) {
+  switch (return_type) {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+    case DataType::Type::kUint32:
+    case DataType::Type::kInt32:
+    case DataType::Type::kReference:
+    case DataType::Type::kUint64:
+    case DataType::Type::kInt64:
+      return Location::RegisterLocation(A0);
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      return Location::FpuRegisterLocation(FA0);
+
+    case DataType::Type::kVoid:
+      return Location::NoLocation();
+  }
+  UNREACHABLE();
+}
+
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  DCHECK_EQ(
+      calling_convention.GetRegisterAt(0),
+      calling_convention.GetReturnLocation(DataType::Type::kReference).AsRegister<XRegister>());
+  return caller_saves;
+}
+
+template <ClassStatus kStatus>
+static constexpr int64_t ShiftedSignExtendedClassStatusValue() {
+  // This is used only for status values that have the highest bit set.
+  static_assert(CLZ(enum_cast<uint32_t>(kStatus)) == status_lsb_position);
+  constexpr uint32_t kShiftedStatusValue = enum_cast<uint32_t>(kStatus) << status_lsb_position;
+  static_assert(kShiftedStatusValue >= 0x80000000u);
+  return static_cast<int64_t>(kShiftedStatusValue) - (INT64_C(1) << 32);
+}
+
+int32_t ReadBarrierMarkEntrypointOffset(Location ref) {
+  DCHECK(ref.IsRegister());
+  int reg = ref.reg();
+  DCHECK(T0 <= reg && reg <= T6 && reg != TR) << reg;
+  // Note: Entrypoints for registers X30 (T5) and X31 (T6) are stored in entries
+  // for X0 (Zero) and X1 (RA) because these are not valid registers for marking
+  // and we currently have slots only up to register 29.
+  int entry_point_number = (reg >= 30) ? reg - 30 : reg;
+  return Thread::ReadBarrierMarkEntryPointsOffset<kRiscv64PointerSize>(entry_point_number);
+}
+
+Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
+  return Riscv64ReturnLocation(return_type);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetReturnLocation(DataType::Type type) const {
+  return Riscv64ReturnLocation(type);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetMethodLocation() const {
+  return Location::RegisterLocation(kArtMethodRegister);
+}
+
+Location InvokeDexCallingConventionVisitorRISCV64::GetNextLocation(DataType::Type type) {
+  Location next_location;
+  if (type == DataType::Type::kVoid) {
+    LOG(FATAL) << "Unexpected parameter type " << type;
+  }
+
+  // Note: Unlike the RISC-V C/C++ calling convention, managed ABI does not use
+  // GPRs to pass FP args when we run out of FPRs.
+  if (DataType::IsFloatingPointType(type) &&
+      float_index_ < calling_convention.GetNumberOfFpuRegisters()) {
+    next_location =
+        Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(float_index_++));
+  } else if (!DataType::IsFloatingPointType(type) &&
+             (gp_index_ < calling_convention.GetNumberOfRegisters())) {
+    next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++));
+  } else {
+    size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
+    next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) :
+                                                  Location::StackSlot(stack_offset);
+  }
+
+  // Space on the stack is reserved for all arguments.
+  stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
+
+  return next_location;
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetNextLocation(DataType::Type type) {
+  DCHECK_NE(type, DataType::Type::kReference);
+
+  Location location = Location::NoLocation();
+  if (DataType::IsFloatingPointType(type)) {
+    if (fpr_index_ < kParameterFpuRegistersLength) {
+      location = Location::FpuRegisterLocation(kParameterFpuRegisters[fpr_index_]);
+      ++fpr_index_;
+    }
+    // Native ABI allows passing excessive FP args in GPRs. This is facilitated by
+    // inserting fake conversion intrinsic calls (`Double.doubleToRawLongBits()`
+    // or `Float.floatToRawIntBits()`) by `CriticalNativeAbiFixupRiscv64`.
+    // TODO(riscv64): Implement these  intrinsics and `CriticalNativeAbiFixupRiscv64`.
+  } else {
+    // Native ABI uses the same core registers as a runtime call.
+    if (gpr_index_ < kRuntimeParameterCoreRegistersLength) {
+      location = Location::RegisterLocation(kRuntimeParameterCoreRegisters[gpr_index_]);
+      ++gpr_index_;
+    }
+  }
+  if (location.IsInvalid()) {
+    if (DataType::Is64BitType(type)) {
+      location = Location::DoubleStackSlot(stack_offset_);
+    } else {
+      location = Location::StackSlot(stack_offset_);
+    }
+    stack_offset_ += kFramePointerSize;
+
+    if (for_register_allocation_) {
+      location = Location::Any();
+    }
+  }
+  return location;
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetReturnLocation(
+    DataType::Type type) const {
+  // The result is returned the same way in native ABI and managed ABI. No result conversion is
+  // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`.
+  InvokeDexCallingConventionVisitorRISCV64 dex_calling_convention;
+  return dex_calling_convention.GetReturnLocation(type);
+}
+
+Location CriticalNativeCallingConventionVisitorRiscv64::GetMethodLocation() const {
+  // Pass the method in the hidden argument T0.
+  return Location::RegisterLocation(T0);
+}
+
+#define __ down_cast<CodeGeneratorRISCV64*>(codegen)->GetAssembler()->  // NOLINT
+
+void LocationsBuilderRISCV64::HandleInvoke(HInvoke* instruction) {
+  InvokeDexCallingConventionVisitorRISCV64 calling_convention_visitor;
+  CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor);
+}
+
+class CompileOptimizedSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  CompileOptimizedSlowPathRISCV64() : SlowPathCodeRISCV64(/*instruction=*/ nullptr) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    uint32_t entrypoint_offset =
+        GetThreadOffset<kRiscv64PointerSize>(kQuickCompileOptimized).Int32Value();
+    __ Bind(GetEntryLabel());
+    __ Loadd(RA, TR, entrypoint_offset);
+    // Note: we don't record the call here (and therefore don't generate a stack
+    // map), as the entrypoint should never be suspended.
+    __ Jalr(RA);
+    __ J(GetExitLabel());
+  }
+
+  const char* GetDescription() const override { return "CompileOptimizedSlowPath"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathRISCV64);
+};
+
+class SuspendCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  SuspendCheckSlowPathRISCV64(HSuspendCheck* instruction, HBasicBlock* successor)
+      : SlowPathCodeRISCV64(instruction), successor_(successor) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);  // Only saves live vector registers for SIMD.
+    riscv64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, locations);  // Only restores live vector registers for SIMD.
+    if (successor_ == nullptr) {
+      __ J(GetReturnLabel());
+    } else {
+      __ J(riscv64_codegen->GetLabelOf(successor_));
+    }
+  }
+
+  Riscv64Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
+
+  const char* GetDescription() const override { return "SuspendCheckSlowPathRISCV64"; }
+
+  HBasicBlock* GetSuccessor() const { return successor_; }
+
+ private:
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
+  Riscv64Label return_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathRISCV64);
+};
+
+class NullCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  explicit NullCheckSlowPathRISCV64(HNullCheck* instr) : SlowPathCodeRISCV64(instr) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    riscv64_codegen->InvokeRuntime(
+        kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+  }
+
+  bool IsFatal() const override { return true; }
+
+  const char* GetDescription() const override { return "NullCheckSlowPathRISCV64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathRISCV64);
+};
+
+class BoundsCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  explicit BoundsCheckSlowPathRISCV64(HBoundsCheck* instruction)
+      : SlowPathCodeRISCV64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               DataType::Type::kInt32,
+                               locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               DataType::Type::kInt32);
+    QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ?
+                                         kQuickThrowStringBounds :
+                                         kQuickThrowArrayBounds;
+    riscv64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+  }
+
+  bool IsFatal() const override { return true; }
+
+  const char* GetDescription() const override { return "BoundsCheckSlowPathRISCV64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathRISCV64);
+};
+
+class LoadClassSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  LoadClassSlowPathRISCV64(HLoadClass* cls, HInstruction* at) : SlowPathCodeRISCV64(at), cls_(cls) {
+    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+    DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    Location out = locations->Out();
+    const uint32_t dex_pc = instruction_->GetDexPc();
+    bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+    bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    if (must_resolve_type) {
+      DCHECK(IsSameDexFile(cls_->GetDexFile(), riscv64_codegen->GetGraph()->GetDexFile()) ||
+             riscv64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
+             ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
+                             &cls_->GetDexFile()));
+      dex::TypeIndex type_index = cls_->GetTypeIndex();
+      __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
+      if (cls_->NeedsAccessCheck()) {
+        CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
+        riscv64_codegen->InvokeRuntime(
+            kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
+      } else {
+        CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+        riscv64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+      }
+      // If we also must_do_clinit, the resolved type is now in the correct register.
+    } else {
+      DCHECK(must_do_clinit);
+      Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+      riscv64_codegen->MoveLocation(
+          Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source, cls_->GetType());
+    }
+    if (must_do_clinit) {
+      riscv64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
+    }
+
+    // Move the class to the desired location.
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      DataType::Type type = DataType::Type::kReference;
+      DCHECK_EQ(type, instruction_->GetType());
+      riscv64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
+    }
+    RestoreLiveRegisters(codegen, locations);
+
+    __ J(GetExitLabel());
+  }
+
+  const char* GetDescription() const override { return "LoadClassSlowPathRISCV64"; }
+
+ private:
+  // The class this slow path will load.
+  HLoadClass* const cls_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathRISCV64);
+};
+
+class DeoptimizationSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  explicit DeoptimizationSlowPathRISCV64(HDeoptimize* instruction)
+      : SlowPathCodeRISCV64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    __ Bind(GetEntryLabel());
+    LocationSummary* locations = instruction_->GetLocations();
+    SaveLiveRegisters(codegen, locations);
+    InvokeRuntimeCallingConvention calling_convention;
+    __ LoadConst32(calling_convention.GetRegisterAt(0),
+                   static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
+    riscv64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
+  }
+
+  const char* GetDescription() const override { return "DeoptimizationSlowPathRISCV64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathRISCV64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  ReadBarrierForRootSlowPathRISCV64(HInstruction* instruction, Location out, Location root)
+      : SlowPathCodeRISCV64(instruction), out_(out), root_(root) {
+    DCHECK(gUseReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    DataType::Type type = DataType::Type::kReference;
+    XRegister reg_out = out_.AsRegister<XRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    riscv64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                  root_,
+                                  DataType::Type::kReference);
+    riscv64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+                                   instruction_,
+                                   instruction_->GetDexPc(),
+                                   this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    riscv64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ J(GetExitLabel());
+  }
+
+  const char* GetDescription() const override { return "ReadBarrierForRootSlowPathRISCV64"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathRISCV64);
+};
+
+class ArraySetSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  explicit ArraySetSlowPathRISCV64(HInstruction* instruction) : SlowPathCodeRISCV64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        DataType::Type::kReference,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        DataType::Type::kInt32,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        DataType::Type::kReference,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    riscv64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ J(GetExitLabel());
+  }
+
+  const char* GetDescription() const override { return "ArraySetSlowPathRISCV64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathRISCV64);
+};
+
+class TypeCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  explicit TypeCheckSlowPathRISCV64(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeRISCV64(instruction), is_fatal_(is_fatal) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+
+    uint32_t dex_pc = instruction_->GetDexPc();
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+
+    __ Bind(GetEntryLabel());
+    if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
+      SaveLiveRegisters(codegen, locations);
+    }
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               DataType::Type::kReference,
+                               locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               DataType::Type::kReference);
+    if (instruction_->IsInstanceOf()) {
+      riscv64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
+      DataType::Type ret_type = instruction_->GetType();
+      Location ret_loc = calling_convention.GetReturnLocation(ret_type);
+      riscv64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      riscv64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
+    }
+
+    if (!is_fatal_) {
+      RestoreLiveRegisters(codegen, locations);
+      __ J(GetExitLabel());
+    }
+  }
+
+  const char* GetDescription() const override { return "TypeCheckSlowPathRISCV64"; }
+
+  bool IsFatal() const override { return is_fatal_; }
+
+ private:
+  const bool is_fatal_;
+
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathRISCV64);
+};
+
+class DivZeroCheckSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  explicit DivZeroCheckSlowPathRISCV64(HDivZeroCheck* instruction)
+      : SlowPathCodeRISCV64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    __ Bind(GetEntryLabel());
+    riscv64_codegen->InvokeRuntime(
+        kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
+  }
+
+  bool IsFatal() const override { return true; }
+
+  const char* GetDescription() const override { return "DivZeroCheckSlowPathRISCV64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathRISCV64);
+};
+
+class ReadBarrierMarkSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  ReadBarrierMarkSlowPathRISCV64(HInstruction* instruction, Location ref, Location entrypoint)
+      : SlowPathCodeRISCV64(instruction), ref_(ref), entrypoint_(entrypoint) {
+    DCHECK(gUseReadBarrier);
+    DCHECK(entrypoint.IsRegister());
+  }
+
+  const char* GetDescription() const override { return "ReadBarrierMarkSlowPathRISCV64"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    LocationSummary* locations = instruction_->GetLocations();
+    XRegister ref_reg = ref_.AsRegister<XRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsPredicatedInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    DCHECK(ref_reg >= T0 && ref_reg != TR);
+
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    riscv64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+    DCHECK_NE(entrypoint_.AsRegister<XRegister>(), TMP);  // A taken branch can clobber `TMP`.
+    __ Jalr(entrypoint_.AsRegister<XRegister>());  // Clobbers `RA` (used as the `entrypoint_`).
+    __ J(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the already loaded entrypoint.
+  const Location entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathRISCV64);
+};
+
+class LoadStringSlowPathRISCV64 : public SlowPathCodeRISCV64 {
+ public:
+  explicit LoadStringSlowPathRISCV64(HLoadString* instruction)
+      : SlowPathCodeRISCV64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) override {
+    DCHECK(instruction_->IsLoadString());
+    DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
+    CodeGeneratorRISCV64* riscv64_codegen = down_cast<CodeGeneratorRISCV64*>(codegen);
+    InvokeRuntimeCallingConvention calling_convention;
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_);
+    riscv64_codegen->InvokeRuntime(
+        kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+    DataType::Type type = DataType::Type::kReference;
+    DCHECK_EQ(type, instruction_->GetType());
+    riscv64_codegen->MoveLocation(
+        locations->Out(), calling_convention.GetReturnLocation(type), type);
+    RestoreLiveRegisters(codegen, locations);
+
+    __ J(GetExitLabel());
+  }
+
+  const char* GetDescription() const override { return "LoadStringSlowPathRISCV64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathRISCV64);
+};
+
+#undef __
+#define __ down_cast<Riscv64Assembler*>(GetAssembler())->  // NOLINT
+
+template <typename Reg,
+          void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
+          void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
+inline void InstructionCodeGeneratorRISCV64::FpBinOp(
+    Reg rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
+  if (type == DataType::Type::kFloat32) {
+    (assembler->*opS)(rd, rs1, rs2);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kFloat64);
+    (assembler->*opD)(rd, rs1, rs2);
+  }
+}
+
+inline void InstructionCodeGeneratorRISCV64::FAdd(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FSub(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FSubS, &Riscv64Assembler::FSubD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FDiv(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FDivS, &Riscv64Assembler::FDivD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMul(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FMulS, &Riscv64Assembler::FMulD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMin(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FMinS, &Riscv64Assembler::FMinD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMax(
+    FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<FRegister, &Riscv64Assembler::FMaxS, &Riscv64Assembler::FMaxD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FEq(
+    XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<XRegister, &Riscv64Assembler::FEqS, &Riscv64Assembler::FEqD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FLt(
+    XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<XRegister, &Riscv64Assembler::FLtS, &Riscv64Assembler::FLtD>(rd, rs1, rs2, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FLe(
+    XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
+  FpBinOp<XRegister, &Riscv64Assembler::FLeS, &Riscv64Assembler::FLeD>(rd, rs1, rs2, type);
+}
+
+template <typename Reg,
+          void (Riscv64Assembler::*opS)(Reg, FRegister),
+          void (Riscv64Assembler::*opD)(Reg, FRegister)>
+inline void InstructionCodeGeneratorRISCV64::FpUnOp(
+    Reg rd, FRegister rs1, DataType::Type type) {
+  Riscv64Assembler* assembler = down_cast<CodeGeneratorRISCV64*>(codegen_)->GetAssembler();
+  if (type == DataType::Type::kFloat32) {
+    (assembler->*opS)(rd, rs1);
+  } else {
+    DCHECK_EQ(type, DataType::Type::kFloat64);
+    (assembler->*opD)(rd, rs1);
+  }
+}
+
+inline void InstructionCodeGeneratorRISCV64::FAbs(
+    FRegister rd, FRegister rs1, DataType::Type type) {
+  FpUnOp<FRegister, &Riscv64Assembler::FAbsS, &Riscv64Assembler::FAbsD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FNeg(
+    FRegister rd, FRegister rs1, DataType::Type type) {
+  FpUnOp<FRegister, &Riscv64Assembler::FNegS, &Riscv64Assembler::FNegD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FMv(
+    FRegister rd, FRegister rs1, DataType::Type type) {
+  FpUnOp<FRegister, &Riscv64Assembler::FMvS, &Riscv64Assembler::FMvD>(rd, rs1, type);
+}
+
+inline void InstructionCodeGeneratorRISCV64::FClass(
+    XRegister rd, FRegister rs1, DataType::Type type) {
+  FpUnOp<XRegister, &Riscv64Assembler::FClassS, &Riscv64Assembler::FClassD>(rd, rs1, type);
+}
+
+void InstructionCodeGeneratorRISCV64::Load(
+    Location out, XRegister rs1, int32_t offset, DataType::Type type) {
+  switch (type) {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+      __ Loadbu(out.AsRegister<XRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kInt8:
+      __ Loadb(out.AsRegister<XRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kUint16:
+      __ Loadhu(out.AsRegister<XRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kInt16:
+      __ Loadh(out.AsRegister<XRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kInt32:
+      __ Loadw(out.AsRegister<XRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kInt64:
+      __ Loadd(out.AsRegister<XRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kReference:
+      __ Loadwu(out.AsRegister<XRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kFloat32:
+      __ FLoadw(out.AsFpuRegister<FRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kFloat64:
+      __ FLoadd(out.AsFpuRegister<FRegister>(), rs1, offset);
+      break;
+    case DataType::Type::kUint32:
+    case DataType::Type::kUint64:
+    case DataType::Type::kVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::Store(
+    Location value, XRegister rs1, int32_t offset, DataType::Type type) {
+  DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
+  if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
+    riscv64::ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    __ Mv(tmp, value.AsRegister<XRegister>());
+    codegen_->PoisonHeapReference(tmp);
+    __ Storew(tmp, rs1, offset);
+    return;
+  }
+  switch (type) {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+      __ Storeb(InputXRegisterOrZero(value), rs1, offset);
+      break;
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+      __ Storeh(InputXRegisterOrZero(value), rs1, offset);
+      break;
+    case DataType::Type::kFloat32:
+      if (!value.IsConstant()) {
+        __ FStorew(value.AsFpuRegister<FRegister>(), rs1, offset);
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    case DataType::Type::kInt32:
+    case DataType::Type::kReference:
+      __ Storew(InputXRegisterOrZero(value), rs1, offset);
+      break;
+    case DataType::Type::kFloat64:
+      if (!value.IsConstant()) {
+        __ FStored(value.AsFpuRegister<FRegister>(), rs1, offset);
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    case DataType::Type::kInt64:
+      __ Stored(InputXRegisterOrZero(value), rs1, offset);
+      break;
+    case DataType::Type::kUint32:
+    case DataType::Type::kUint64:
+    case DataType::Type::kVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::ShNAdd(
+    XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type) {
+  switch (type) {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+      DCHECK_EQ(DataType::SizeShift(type), 0u);
+      __ Add(rd, rs1, rs2);
+      break;
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+      DCHECK_EQ(DataType::SizeShift(type), 1u);
+      __ Sh1Add(rd, rs1, rs2);
+      break;
+    case DataType::Type::kInt32:
+    case DataType::Type::kReference:
+    case DataType::Type::kFloat32:
+      DCHECK_EQ(DataType::SizeShift(type), 2u);
+      __ Sh2Add(rd, rs1, rs2);
+      break;
+    case DataType::Type::kInt64:
+    case DataType::Type::kFloat64:
+      DCHECK_EQ(DataType::SizeShift(type), 3u);
+      __ Sh3Add(rd, rs1, rs2);
+      break;
+    case DataType::Type::kUint32:
+    case DataType::Type::kUint64:
+    case DataType::Type::kVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+Riscv64Assembler* ParallelMoveResolverRISCV64::GetAssembler() const {
+  return codegen_->GetAssembler();
+}
+
+void ParallelMoveResolverRISCV64::EmitMove(size_t index) {
+  MoveOperands* move = moves_[index];
+  codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverRISCV64::EmitSwap(size_t index) {
+  MoveOperands* move = moves_[index];
+  codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType());
+}
+
+void ParallelMoveResolverRISCV64::SpillScratch([[maybe_unused]] int reg) {
+  LOG(FATAL) << "Unimplemented";
+  UNREACHABLE();
+}
+
+void ParallelMoveResolverRISCV64::RestoreScratch([[maybe_unused]] int reg) {
+  LOG(FATAL) << "Unimplemented";
+  UNREACHABLE();
+}
+
+void ParallelMoveResolverRISCV64::Exchange(int index1, int index2, bool double_slot) {
+  // We have 2 scratch X registers and 1 scratch F register that we can use. We prefer
+  // to use X registers for the swap but if both offsets are too big, we need to reserve
+  // one of the X registers for address adjustment and use an F register.
+  bool use_fp_tmp2 = false;
+  if (!IsInt<12>(index2)) {
+    if (!IsInt<12>(index1)) {
+      use_fp_tmp2 = true;
+    } else {
+      std::swap(index1, index2);
+    }
+  }
+  DCHECK_IMPLIES(!IsInt<12>(index2), use_fp_tmp2);
+
+  Location loc1(double_slot ? Location::DoubleStackSlot(index1) : Location::StackSlot(index1));
+  Location loc2(double_slot ? Location::DoubleStackSlot(index2) : Location::StackSlot(index2));
+  riscv64::ScratchRegisterScope srs(GetAssembler());
+  Location tmp = Location::RegisterLocation(srs.AllocateXRegister());
+  DataType::Type tmp_type = double_slot ? DataType::Type::kInt64 : DataType::Type::kInt32;
+  Location tmp2 = use_fp_tmp2
+      ? Location::FpuRegisterLocation(srs.AllocateFRegister())
+      : Location::RegisterLocation(srs.AllocateXRegister());
+  DataType::Type tmp2_type = use_fp_tmp2
+      ? (double_slot ? DataType::Type::kFloat64 : DataType::Type::kFloat32)
+      : tmp_type;
+
+  codegen_->MoveLocation(tmp, loc1, tmp_type);
+  codegen_->MoveLocation(tmp2, loc2, tmp2_type);
+  if (use_fp_tmp2) {
+    codegen_->MoveLocation(loc2, tmp, tmp_type);
+  } else {
+    // We cannot use `Stored()` or `Storew()` via `MoveLocation()` because we have
+    // no more scratch registers available. Use `Sd()` or `Sw()` explicitly.
+    DCHECK(IsInt<12>(index2));
+    if (double_slot) {
+      __ Sd(tmp.AsRegister<XRegister>(), SP, index2);
+    } else {
+      __ Sw(tmp.AsRegister<XRegister>(), SP, index2);
+    }
+    srs.FreeXRegister(tmp.AsRegister<XRegister>());  // Free a temporary for `MoveLocation()`.
+  }
+  codegen_->MoveLocation(loc1, tmp2, tmp2_type);
+}
+
+InstructionCodeGeneratorRISCV64::InstructionCodeGeneratorRISCV64(HGraph* graph,
+                                                                 CodeGeneratorRISCV64* codegen)
+    : InstructionCodeGenerator(graph, codegen),
+      assembler_(codegen->GetAssembler()),
+      codegen_(codegen) {}
+
+void InstructionCodeGeneratorRISCV64::GenerateClassInitializationCheck(
+    SlowPathCodeRISCV64* slow_path, XRegister class_reg) {
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister tmp = srs.AllocateXRegister();
+  XRegister tmp2 = srs.AllocateXRegister();
+
+  // We shall load the full 32-bit status word with sign-extension and compare as unsigned
+  // to a sign-extended shifted status value. This yields the same comparison as loading and
+  // materializing unsigned but the constant is materialized with a single LUI instruction.
+  __ Loadw(tmp, class_reg, mirror::Class::StatusOffset().SizeValue());  // Sign-extended.
+  __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>());
+  __ Bltu(tmp, tmp2, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateBitstringTypeCheckCompare(
+    HTypeCheckInstruction* instruction, XRegister temp) {
+  UNUSED(instruction);
+  UNUSED(temp);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                           HBasicBlock* successor) {
+  if (instruction->IsNoOp()) {
+    if (successor != nullptr) {
+      __ J(codegen_->GetLabelOf(successor));
+    }
+    return;
+  }
+
+  if (codegen_->CanUseImplicitSuspendCheck()) {
+    LOG(FATAL) << "Unimplemented ImplicitSuspendCheck";
+    return;
+  }
+
+  SuspendCheckSlowPathRISCV64* slow_path =
+      down_cast<SuspendCheckSlowPathRISCV64*>(instruction->GetSlowPath());
+
+  if (slow_path == nullptr) {
+    slow_path =
+        new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathRISCV64(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister tmp = srs.AllocateXRegister();
+  __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value());
+  static_assert(Thread::SuspendOrCheckpointRequestFlags() != std::numeric_limits<uint32_t>::max());
+  static_assert(IsPowerOfTwo(Thread::SuspendOrCheckpointRequestFlags() + 1u));
+  // Shift out other bits. Use an instruction that can be 16-bit with the "C" Standard Extension.
+  __ Slli(tmp, tmp, CLZ(static_cast<uint64_t>(Thread::SuspendOrCheckpointRequestFlags())));
+  if (successor == nullptr) {
+    __ Bnez(tmp, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ Beqz(tmp, codegen_->GetLabelOf(successor));
+    __ J(slow_path->GetEntryLabel());
+    // slow_path will return to GetLabelOf(successor).
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  XRegister out_reg = out.AsRegister<XRegister>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(gUseReadBarrier);
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check= */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Mv(maybe_temp.AsRegister<XRegister>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ Loadwu(out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ Loadwu(out_reg, out_reg, offset);
+    codegen_->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  XRegister out_reg = out.AsRegister<XRegister>();
+  XRegister obj_reg = obj.AsRegister<XRegister>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(gUseReadBarrier);
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check= */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ Loadwu(out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ Loadwu(out_reg, obj_reg, offset);
+    codegen_->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                              Location root,
+                                                              XRegister obj,
+                                                              uint32_t offset,
+                                                              ReadBarrierOption read_barrier_option,
+                                                              Riscv64Label* label_low) {
+  DCHECK_IMPLIES(label_low != nullptr, offset == kLinkTimeOffsetPlaceholderLow) << offset;
+  XRegister root_reg = root.AsRegister<XRegister>();
+  if (read_barrier_option == kWithReadBarrier) {
+    DCHECK(gUseReadBarrier);
+    if (kUseBakerReadBarrier) {
+      // Note that we do not actually check the value of `GetIsGcMarking()`
+      // to decide whether to mark the loaded GC root or not.  Instead, we
+      // load into `temp` (T6) the read barrier mark entry point corresponding
+      // to register `root`. If `temp` is null, it means that `GetIsGcMarking()`
+      // is false, and vice versa.
+      //
+      //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
+      //     temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //     if (temp != null) {
+      //       root = temp(root)
+      //     }
+      //
+      // TODO(riscv64): Introduce a "marking register" that holds the pointer to one of the
+      // register marking entrypoints if marking (null if not marking) and make sure that
+      // marking entrypoints for other registers are at known offsets, so that we can call
+      // them using the "marking register" plus the offset embedded in the JALR instruction.
+
+      if (label_low != nullptr) {
+        __ Bind(label_low);
+      }
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ Loadwu(root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path marking the GC root `root`.
+      XRegister tmp = RA;  // Use RA as temp. It is clobbered in the slow path anyway.
+      SlowPathCodeRISCV64* slow_path =
+          new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(
+              instruction, root, Location::RegisterLocation(tmp));
+      codegen_->AddSlowPath(slow_path);
+
+      const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(root);
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ Loadd(tmp, TR, entry_point_offset);
+      __ Bnez(tmp, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      if (label_low != nullptr) {
+        __ Bind(label_low);
+      }
+      __ AddConst32(root_reg, obj, offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    if (label_low != nullptr) {
+      __ Bind(label_low);
+    }
+    __ Loadwu(root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateTestAndBranch(HInstruction* instruction,
+                                                            size_t condition_input_index,
+                                                            Riscv64Label* true_target,
+                                                            Riscv64Label* false_target) {
+  HInstruction* cond = instruction->InputAt(condition_input_index);
+
+  if (true_target == nullptr && false_target == nullptr) {
+    // Nothing to do. The code always falls through.
+    return;
+  } else if (cond->IsIntConstant()) {
+    // Constant condition, statically compared against "true" (integer value 1).
+    if (cond->AsIntConstant()->IsTrue()) {
+      if (true_target != nullptr) {
+        __ J(true_target);
+      }
+    } else {
+      DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
+      if (false_target != nullptr) {
+        __ J(false_target);
+      }
+    }
+    return;
+  }
+
+  // The following code generates these patterns:
+  //  (1) true_target == nullptr && false_target != nullptr
+  //        - opposite condition true => branch to false_target
+  //  (2) true_target != nullptr && false_target == nullptr
+  //        - condition true => branch to true_target
+  //  (3) true_target != nullptr && false_target != nullptr
+  //        - condition true => branch to true_target
+  //        - branch to false_target
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    // The condition instruction has been materialized, compare the output to 0.
+    Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+    DCHECK(cond_val.IsRegister());
+    if (true_target == nullptr) {
+      __ Beqz(cond_val.AsRegister<XRegister>(), false_target);
+    } else {
+      __ Bnez(cond_val.AsRegister<XRegister>(), true_target);
+    }
+  } else {
+    // The condition instruction has not been materialized, use its inputs as
+    // the comparison and its condition as the branch condition.
+    HCondition* condition = cond->AsCondition();
+    DataType::Type type = condition->InputAt(0)->GetType();
+    LocationSummary* locations = condition->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    Riscv64Label* branch_target = true_target;
+
+    if (true_target == nullptr) {
+      if_cond = condition->GetOppositeCondition();
+      branch_target = false_target;
+    }
+
+    switch (type) {
+      case DataType::Type::kFloat32:
+      case DataType::Type::kFloat64:
+        GenerateFpCondition(if_cond, condition->IsGtBias(), type, locations, branch_target);
+        break;
+      default:
+        // Integral types and reference equality.
+        GenerateIntLongCompareAndBranch(if_cond, locations, branch_target);
+        break;
+    }
+  }
+
+  // If neither branch falls through (case 3), the conditional branch to `true_target`
+  // was already emitted (case 2) and we need to emit a jump to `false_target`.
+  if (true_target != nullptr && false_target != nullptr) {
+    __ J(false_target);
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DataType::Type type = instruction->GetResultType();
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(imm == 1 || imm == -1);
+
+  if (instruction->IsRem()) {
+    __ Mv(out, Zero);
+  } else {
+    if (imm == -1) {
+      if (type == DataType::Type::kInt32) {
+        __ Subw(out, Zero, dividend);
+      } else {
+        DCHECK_EQ(type, DataType::Type::kInt64);
+        __ Sub(out, Zero, dividend);
+      }
+    } else if (out != dividend) {
+      __ Mv(out, dividend);
+    }
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DataType::Type type = instruction->GetResultType();
+  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  DCHECK(second.IsConstant());
+
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  int64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
+  int ctz_imm = CTZ(abs_imm);
+  DCHECK_GE(ctz_imm, 1);  // Division by +/-1 is handled by `DivRemOneOrMinusOne()`.
+
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister tmp = srs.AllocateXRegister();
+  // Calculate the negative dividend adjustment `tmp = dividend < 0 ? abs_imm - 1 : 0`.
+  // This adjustment is needed for rounding the division result towards zero.
+  if (type == DataType::Type::kInt32 || ctz_imm == 1) {
+    // A 32-bit dividend is sign-extended to 64-bit, so we can use the upper bits.
+    // And for a 64-bit division by +/-2, we need just the sign bit.
+    DCHECK_IMPLIES(type == DataType::Type::kInt32, ctz_imm < 32);
+    __ Srli(tmp, dividend, 64 - ctz_imm);
+  } else {
+    // For other 64-bit divisions, we need to replicate the sign bit.
+    __ Srai(tmp, dividend, 63);
+    __ Srli(tmp, tmp, 64 - ctz_imm);
+  }
+  // The rest of the calculation can use 64-bit operations even for 32-bit div/rem.
+  __ Add(tmp, tmp, dividend);
+  if (instruction->IsDiv()) {
+    __ Srai(out, tmp, ctz_imm);
+    if (imm < 0) {
+      __ Neg(out, out);
+    }
+  } else {
+    if (ctz_imm <= 11) {
+      __ Andi(tmp, tmp, -abs_imm);
+    } else {
+      ScratchRegisterScope srs2(GetAssembler());
+      XRegister tmp2 = srs2.AllocateXRegister();
+      __ Li(tmp2, -abs_imm);
+      __ And(tmp, tmp, tmp2);
+    }
+    __ Sub(out, dividend, tmp);
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  Location second = locations->InAt(1);
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DataType::Type type = instruction->GetResultType();
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister tmp = srs.AllocateXRegister();
+
+  // TODO: optimize with constant.
+  __ LoadConst64(tmp, imm);
+  if (instruction->IsDiv()) {
+    if (type == DataType::Type::kInt32) {
+      __ Divw(out, dividend, tmp);
+    } else {
+      __ Div(out, dividend, tmp);
+    }
+  } else {
+    if (type == DataType::Type::kInt32)  {
+      __ Remw(out, dividend, tmp);
+    } else {
+      __ Rem(out, dividend, tmp);
+    }
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsDiv() || instruction->IsRem());
+  DataType::Type type = instruction->GetResultType();
+  DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type;
+
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  Location second = locations->InAt(1);
+
+  if (second.IsConstant()) {
+    int64_t imm = Int64FromConstant(second.GetConstant());
+    if (imm == 0) {
+      // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+    } else if (imm == 1 || imm == -1) {
+      DivRemOneOrMinusOne(instruction);
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+      DivRemByPowerOfTwo(instruction);
+    } else {
+      DCHECK(imm <= -2 || imm >= 2);
+      GenerateDivRemWithAnyConstant(instruction);
+    }
+  } else {
+    XRegister dividend = locations->InAt(0).AsRegister<XRegister>();
+    XRegister divisor = second.AsRegister<XRegister>();
+    if (instruction->IsDiv()) {
+      if (type == DataType::Type::kInt32) {
+        __ Divw(out, dividend, divisor);
+      } else {
+        __ Div(out, dividend, divisor);
+      }
+    } else {
+      if (type == DataType::Type::kInt32) {
+        __ Remw(out, dividend, divisor);
+      } else {
+        __ Rem(out, dividend, divisor);
+      }
+    }
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateIntLongCondition(IfCondition cond,
+                                                               LocationSummary* locations) {
+  XRegister rd = locations->Out().AsRegister<XRegister>();
+  XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+  Location rs2_location = locations->InAt(1);
+  bool use_imm = rs2_location.IsConstant();
+  int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
+  XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (!use_imm) {
+        __ Sub(rd, rs1, rs2);  // SUB is OK here even for 32-bit comparison.
+      } else if (imm != 0) {
+        DCHECK(IsInt<12>(-imm));
+        __ Addi(rd, rs1, -imm);  // ADDI is OK here even for 32-bit comparison.
+      }  // else test `rs1` directly without subtraction for `use_imm && imm == 0`.
+      if (cond == kCondEQ) {
+        __ Seqz(rd, (use_imm && imm == 0) ? rs1 : rd);
+      } else {
+        __ Snez(rd, (use_imm && imm == 0) ? rs1 : rd);
+      }
+      break;
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm) {
+        DCHECK(IsInt<12>(imm));
+        __ Slti(rd, rs1, imm);
+      } else {
+        __ Slt(rd, rs1, rs2);
+      }
+      if (cond == kCondGE) {
+        // Calculate `rs1 >= rhs` as `!(rs1 < rhs)` since there's only the SLT but no SGE.
+        __ Xori(rd, rd, 1);
+      }
+      break;
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm) {
+        // Calculate `rs1 <= imm` as `rs1 < imm + 1`.
+        DCHECK(IsInt<12>(imm + 1));  // The value that overflows would fail this check.
+        __ Slti(rd, rs1, imm + 1);
+      } else {
+        __ Slt(rd, rs2, rs1);
+      }
+      if ((cond == kCondGT) == use_imm) {
+        // Calculate `rs1 > imm` as `!(rs1 < imm + 1)` and calculate
+        // `rs1 <= rs2` as `!(rs2 < rs1)` since there's only the SLT but no SGE.
+        __ Xori(rd, rd, 1);
+      }
+      break;
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm) {
+        // Sltiu sign-extends its 12-bit immediate operand before the comparison
+        // and thus lets us compare directly with unsigned values in the ranges
+        // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
+        DCHECK(IsInt<12>(imm));
+        __ Sltiu(rd, rs1, imm);
+      } else {
+        __ Sltu(rd, rs1, rs2);
+      }
+      if (cond == kCondAE) {
+        // Calculate `rs1 AE rhs` as `!(rs1 B rhs)` since there's only the SLTU but no SGEU.
+        __ Xori(rd, rd, 1);
+      }
+      break;
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm) {
+        // Calculate `rs1 BE imm` as `rs1 B imm + 1`.
+        // Sltiu sign-extends its 12-bit immediate operand before the comparison
+        // and thus lets us compare directly with unsigned values in the ranges
+        // [0, 0x7ff] and [0x[ffffffff]fffff800, 0x[ffffffff]ffffffff].
+        DCHECK(IsInt<12>(imm + 1));  // The value that overflows would fail this check.
+        __ Sltiu(rd, rs1, imm + 1);
+      } else {
+        __ Sltu(rd, rs2, rs1);
+      }
+      if ((cond == kCondA) == use_imm) {
+        // Calculate `rs1 A imm` as `!(rs1 B imm + 1)` and calculate
+        // `rs1 BE rs2` as `!(rs2 B rs1)` since there's only the SLTU but no SGEU.
+        __ Xori(rd, rd, 1);
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateIntLongCompareAndBranch(IfCondition cond,
+                                                                      LocationSummary* locations,
+                                                                      Riscv64Label* label) {
+  XRegister left = locations->InAt(0).AsRegister<XRegister>();
+  Location right_location = locations->InAt(1);
+  if (right_location.IsConstant()) {
+    DCHECK_EQ(CodeGenerator::GetInt64ValueOf(right_location.GetConstant()), 0);
+    switch (cond) {
+      case kCondEQ:
+      case kCondBE:  // <= 0 if zero
+        __ Beqz(left, label);
+        break;
+      case kCondNE:
+      case kCondA:  // > 0 if non-zero
+        __ Bnez(left, label);
+        break;
+      case kCondLT:
+        __ Bltz(left, label);
+        break;
+      case kCondGE:
+        __ Bgez(left, label);
+        break;
+      case kCondLE:
+        __ Blez(left, label);
+        break;
+      case kCondGT:
+        __ Bgtz(left, label);
+        break;
+      case kCondB:  // always false
+        break;
+      case kCondAE:  // always true
+        __ J(label);
+        break;
+    }
+  } else {
+    XRegister right_reg = right_location.AsRegister<XRegister>();
+    switch (cond) {
+      case kCondEQ:
+        __ Beq(left, right_reg, label);
+        break;
+      case kCondNE:
+        __ Bne(left, right_reg, label);
+        break;
+      case kCondLT:
+        __ Blt(left, right_reg, label);
+        break;
+      case kCondGE:
+        __ Bge(left, right_reg, label);
+        break;
+      case kCondLE:
+        __ Ble(left, right_reg, label);
+        break;
+      case kCondGT:
+        __ Bgt(left, right_reg, label);
+        break;
+      case kCondB:
+        __ Bltu(left, right_reg, label);
+        break;
+      case kCondAE:
+        __ Bgeu(left, right_reg, label);
+        break;
+      case kCondBE:
+        __ Bleu(left, right_reg, label);
+        break;
+      case kCondA:
+        __ Bgtu(left, right_reg, label);
+        break;
+    }
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenerateFpCondition(IfCondition cond,
+                                                          bool gt_bias,
+                                                          DataType::Type type,
+                                                          LocationSummary* locations,
+                                                          Riscv64Label* label) {
+  // RISCV-V FP compare instructions yield the following values:
+  //                      l<r  l=r  l>r Unordered
+  //             FEQ l,r   0    1    0    0
+  //             FLT l,r   1    0    0    0
+  //             FLT r,l   0    0    1    0
+  //             FLE l,r   1    1    0    0
+  //             FLE r,l   0    1    1    0
+  //
+  // We can calculate the `Compare` results using the following formulas:
+  //                      l<r  l=r  l>r Unordered
+  //     Compare/gt_bias  -1    0    1    1       = ((FLE l,r) ^ 1) - (FLT l,r)
+  //     Compare/lt_bias  -1    0    1   -1       = ((FLE r,l) - 1) + (FLT r,l)
+  // These are emitted in `VisitCompare()`.
+  //
+  // This function emits a fused `Condition(Compare(., .), 0)`. If we compare the
+  // `Compare` results above with 0, we get the following values and formulas:
+  //                      l<r  l=r  l>r Unordered
+  //     CondEQ/-          0    1    0    0       = (FEQ l, r)
+  //     CondNE/-          1    0    1    1       = (FEQ l, r) ^ 1
+  //     CondLT/gt_bias    1    0    0    0       = (FLT l,r)
+  //     CondLT/lt_bias    1    0    0    1       = (FLE r,l) ^ 1
+  //     CondLE/gt_bias    1    1    0    0       = (FLE l,r)
+  //     CondLE/lt_bias    1    1    0    1       = (FLT r,l) ^ 1
+  //     CondGT/gt_bias    0    0    1    1       = (FLE l,r) ^ 1
+  //     CondGT/lt_bias    0    0    1    0       = (FLT r,l)
+  //     CondGE/gt_bias    0    1    1    1       = (FLT l,r) ^ 1
+  //     CondGE/lt_bias    0    1    1    0       = (FLE r,l)
+  // (CondEQ/CondNE comparison with zero yields the same result with gt_bias and lt_bias.)
+  //
+  // If the condition is not materialized, the `^ 1` is not emitted,
+  // instead the condition is reversed by emitting BEQZ instead of BNEZ.
+
+  FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
+
+  DCHECK_EQ(label != nullptr, locations->Out().IsInvalid());
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister rd =
+      (label != nullptr) ? srs.AllocateXRegister() : locations->Out().AsRegister<XRegister>();
+  bool reverse_condition = false;
+
+  switch (cond) {
+    case kCondEQ:
+      FEq(rd, rs1, rs2, type);
+      break;
+    case kCondNE:
+      FEq(rd, rs1, rs2, type);
+      reverse_condition = true;
+      break;
+    case kCondLT:
+      if (gt_bias) {
+        FLt(rd, rs1, rs2, type);
+      } else {
+        FLe(rd, rs2, rs1, type);
+        reverse_condition = true;
+      }
+      break;
+    case kCondLE:
+      if (gt_bias) {
+        FLe(rd, rs1, rs2, type);
+      } else {
+        FLt(rd, rs2, rs1, type);
+        reverse_condition = true;
+      }
+      break;
+    case kCondGT:
+      if (gt_bias) {
+        FLe(rd, rs1, rs2, type);
+        reverse_condition = true;
+      } else {
+        FLt(rd, rs2, rs1, type);
+      }
+      break;
+    case kCondGE:
+      if (gt_bias) {
+        FLt(rd, rs1, rs2, type);
+        reverse_condition = true;
+      } else {
+        FLe(rd, rs2, rs1, type);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unexpected floating-point condition " << cond;
+      UNREACHABLE();
+  }
+
+  if (label != nullptr) {
+    if (reverse_condition) {
+      __ Beqz(rd, label);
+    } else {
+      __ Bnez(rd, label);
+    }
+  } else {
+    if (reverse_condition) {
+      __ Xori(rd, rd, 1);
+    }
+  }
+}
+
+void CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 XRegister obj,
+                                                                 uint32_t offset,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, offset, /*index=*/ Location::NoLocation(), temp, needs_null_check);
+}
+
+void CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                 Location ref,
+                                                                 XRegister obj,
+                                                                 uint32_t data_offset,
+                                                                 Location index,
+                                                                 Location temp,
+                                                                 bool needs_null_check) {
+  GenerateReferenceLoadWithBakerReadBarrier(
+      instruction, ref, obj, data_offset, index, temp, needs_null_check);
+}
+
+void CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                     Location ref,
+                                                                     XRegister obj,
+                                                                     uint32_t offset,
+                                                                     Location index,
+                                                                     Location temp,
+                                                                     bool needs_null_check) {
+  // For now, use the same approach as for GC roots plus unpoison the reference if needed.
+  // TODO(riscv64): Implement checking if the holder is black.
+  UNUSED(temp);
+
+  XRegister reg = ref.AsRegister<XRegister>();
+  if (index.IsValid()) {
+    DCHECK(instruction->IsArrayGet());
+    DCHECK(!needs_null_check);
+    DCHECK(index.IsRegister());
+    // /* HeapReference<Object> */ ref = *(obj + index * element_size + offset)
+    DataType::Type type = DataType::Type::kReference;
+    DCHECK_EQ(type, instruction->GetType());
+    instruction_visitor_.ShNAdd(reg, index.AsRegister<XRegister>(), obj, type);
+    __ Loadwu(reg, reg, offset);
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ Loadwu(reg, obj, offset);
+    if (needs_null_check) {
+      MaybeRecordImplicitNullCheck(instruction);
+    }
+  }
+  MaybeUnpoisonHeapReference(reg);
+
+  // Slow path marking the reference.
+  XRegister tmp = RA;  // Use RA as temp. It is clobbered in the slow path anyway.
+  SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathRISCV64(
+      instruction, ref, Location::RegisterLocation(tmp));
+  AddSlowPath(slow_path);
+
+  const int32_t entry_point_offset = ReadBarrierMarkEntrypointOffset(ref);
+  // Loading the entrypoint does not require a load acquire since it is only changed when
+  // threads are suspended or running a checkpoint.
+  __ Loadd(tmp, TR, entry_point_offset);
+  __ Bnez(tmp, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorRISCV64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                   Location out,
+                                                   Location ref,
+                                                   Location obj,
+                                                   uint32_t offset,
+                                                   Location index) {
+  UNUSED(instruction);
+  UNUSED(out);
+  UNUSED(ref);
+  UNUSED(obj);
+  UNUSED(offset);
+  UNUSED(index);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void CodeGeneratorRISCV64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                        Location out,
+                                                        Location ref,
+                                                        Location obj,
+                                                        uint32_t offset,
+                                                        Location index) {
+  if (gUseReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorRISCV64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    UnpoisonHeapReference(out.AsRegister<XRegister>());
+  }
+}
+
+void CodeGeneratorRISCV64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                          Location out,
+                                                          Location root) {
+  DCHECK(gUseReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeRISCV64* slow_path =
+      new (GetScopedAllocator()) ReadBarrierForRootSlowPathRISCV64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ J(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void InstructionCodeGeneratorRISCV64::HandleGoto(HInstruction* instruction,
+                                                 HBasicBlock* successor) {
+  if (successor->IsExitBlock()) {
+    DCHECK(instruction->GetPrevious()->AlwaysThrows());
+    return;  // no code needed
+  }
+
+  HBasicBlock* block = instruction->GetBlock();
+  HInstruction* previous = instruction->GetPrevious();
+  HLoopInformation* info = block->GetLoopInformation();
+
+  if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
+    codegen_->MaybeIncrementHotness(/*is_frame_entry=*/ false);
+    GenerateSuspendCheck(info->GetSuspendCheck(), successor);
+    return;  // `GenerateSuspendCheck()` emitted the jump.
+  }
+  if (block->IsEntryBlock() && previous != nullptr && previous->IsSuspendCheck()) {
+    GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
+  }
+  if (!codegen_->GoesToNextBlock(block, successor)) {
+    __ J(codegen_->GetLabelOf(successor));
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::GenPackedSwitchWithCompares(XRegister adjusted,
+                                                                  XRegister temp,
+                                                                  uint32_t num_entries,
+                                                                  HBasicBlock* switch_block) {
+  // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
+  // `adjusted` is the original `value` register and we must not clobber it. Otherwise,
+  // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
+
+  // Create a set of compare/jumps.
+  ArrayRef<HBasicBlock* const> successors(switch_block->GetSuccessors());
+  uint32_t index = 0;
+  for (; num_entries - index >= 2u; index += 2u) {
+    // Jump to `successors[index]` if `value == lower_bound + index`.
+    // Note that `adjusted` holds `value - lower_bound - index`.
+    __ Beqz(adjusted, codegen_->GetLabelOf(successors[index]));
+    if (num_entries - index == 2u) {
+      break;  // The last entry shall match, so the branch shall be unconditional.
+    }
+    // Jump to `successors[index + 1]` if `value == lower_bound + index + 1`.
+    // Modify `adjusted` to hold `value - lower_bound - index - 2` for this comparison.
+    __ Addi(temp, adjusted, -2);
+    adjusted = temp;
+    __ Bltz(adjusted, codegen_->GetLabelOf(successors[index + 1]));
+  }
+  // For the last entry, unconditionally jump to `successors[num_entries - 1]`.
+  __ J(codegen_->GetLabelOf(successors[num_entries - 1u]));
+}
+
+void InstructionCodeGeneratorRISCV64::GenTableBasedPackedSwitch(XRegister adjusted,
+                                                                XRegister temp,
+                                                                uint32_t num_entries,
+                                                                HBasicBlock* switch_block) {
+  // Note: The `adjusted` register holds `value - lower_bound`. If the `lower_bound` is 0,
+  // `adjusted` is the original `value` register and we must not clobber it. Otherwise,
+  // `adjusted` is the `temp`. The caller already emitted the `adjusted < num_entries` check.
+
+  // Create a jump table.
+  ArenaVector<Riscv64Label*> labels(num_entries,
+                                    __ GetAllocator()->Adapter(kArenaAllocSwitchTable));
+  const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    labels[i] = codegen_->GetLabelOf(successors[i]);
+  }
+  JumpTable* table = __ CreateJumpTable(std::move(labels));
+
+  // Load the address of the jump table.
+  // Note: The `LoadLabelAddress()` emits AUIPC+ADD. It is possible to avoid the ADD and
+  // instead embed that offset in the LW below as well as all jump table entries but
+  // that would need some invasive changes in the jump table handling in the assembler.
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister table_base = srs.AllocateXRegister();
+  __ LoadLabelAddress(table_base, table->GetLabel());
+
+  // Load the PC difference from the jump table.
+  // TODO(riscv64): Use SH2ADD from the Zba extension.
+  __ Slli(temp, adjusted, 2);
+  __ Add(temp, temp, table_base);
+  __ Lw(temp, temp, 0);
+
+  // Compute the absolute target address by adding the table start address
+  // (the table contains offsets to targets relative to its start).
+  __ Add(temp, temp, table_base);
+  // And jump.
+  __ Jr(temp);
+}
+
+int32_t InstructionCodeGeneratorRISCV64::VecAddress(LocationSummary* locations,
+                                                    size_t size,
+                                                    /*out*/ XRegister* adjusted_base) {
+  UNUSED(locations);
+  UNUSED(size);
+  UNUSED(adjusted_base);
+  LOG(FATAL) << "Unimplemented";
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorRISCV64::GenConditionalMove(HSelect* select) {
+  UNUSED(select);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
+  DCHECK_EQ(instruction->InputCount(), 2u);
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  DataType::Type type = instruction->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      HInstruction* right = instruction->InputAt(1);
+      bool can_use_imm = false;
+      if (instruction->IsMin() || instruction->IsMax()) {
+        can_use_imm = IsZeroBitPattern(instruction);
+      } else if (right->IsConstant()) {
+        int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant());
+        can_use_imm = IsInt<12>(instruction->IsSub() ? -imm : imm);
+      }
+      if (can_use_imm) {
+        locations->SetInAt(1, Location::ConstantLocation(right));
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+      }
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      if (instruction->IsMin() || instruction->IsMax()) {
+        locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+      } else {
+        locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      }
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected " << instruction->DebugName() << " type " << type;
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleBinaryOp(HBinaryOperation* instruction) {
+  DataType::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64: {
+      XRegister rd = locations->Out().AsRegister<XRegister>();
+      XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+      Location rs2_location = locations->InAt(1);
+
+      bool use_imm = rs2_location.IsConstant();
+      XRegister rs2 = use_imm ? kNoXRegister : rs2_location.AsRegister<XRegister>();
+      int64_t imm = use_imm ? CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant()) : 0;
+
+      if (instruction->IsAnd()) {
+        if (use_imm) {
+          __ Andi(rd, rs1, imm);
+        } else {
+          __ And(rd, rs1, rs2);
+        }
+      } else if (instruction->IsOr()) {
+        if (use_imm) {
+          __ Ori(rd, rs1, imm);
+        } else {
+          __ Or(rd, rs1, rs2);
+        }
+      } else if (instruction->IsXor()) {
+        if (use_imm) {
+          __ Xori(rd, rs1, imm);
+        } else {
+          __ Xor(rd, rs1, rs2);
+        }
+      } else if (instruction->IsAdd() || instruction->IsSub()) {
+        if (type == DataType::Type::kInt32) {
+          if (use_imm) {
+            __ Addiw(rd, rs1, instruction->IsSub() ? -imm : imm);
+          } else if (instruction->IsAdd()) {
+            __ Addw(rd, rs1, rs2);
+          } else {
+            DCHECK(instruction->IsSub());
+            __ Subw(rd, rs1, rs2);
+          }
+        } else {
+          if (use_imm) {
+            __ Addi(rd, rs1, instruction->IsSub() ? -imm : imm);
+          } else if (instruction->IsAdd()) {
+            __ Add(rd, rs1, rs2);
+          } else {
+            DCHECK(instruction->IsSub());
+            __ Sub(rd, rs1, rs2);
+          }
+        }
+      } else if (instruction->IsMin()) {
+        DCHECK_IMPLIES(use_imm, imm == 0);
+        __ Min(rd, rs1, use_imm ? Zero : rs2);
+      } else {
+        DCHECK(instruction->IsMax());
+        DCHECK_IMPLIES(use_imm, imm == 0);
+        __ Max(rd, rs1, use_imm ? Zero : rs2);
+      }
+      break;
+    }
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64: {
+      FRegister rd = locations->Out().AsFpuRegister<FRegister>();
+      FRegister rs1 = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rs2 = locations->InAt(1).AsFpuRegister<FRegister>();
+      if (instruction->IsAdd()) {
+        FAdd(rd, rs1, rs2, type);
+      } else if (instruction->IsSub()) {
+        FSub(rd, rs1, rs2, type);
+      } else {
+        DCHECK(instruction->IsMin() || instruction->IsMax());
+        // If one of the operands is NaN and the other is not, riscv64 instructions FMIN/FMAX
+        // return the other operand while we want to return the NaN operand.
+        DCHECK_NE(rd, rs1);  // Requested `Location::kOutputOverlap`.
+        DCHECK_NE(rd, rs2);  // Requested `Location::kOutputOverlap`.
+        ScratchRegisterScope srs(GetAssembler());
+        XRegister tmp = srs.AllocateXRegister();
+        XRegister tmp2 = srs.AllocateXRegister();
+        Riscv64Label done;
+        // Return `rs1` if it's NaN.
+        FClass(tmp, rs1, type);
+        __ Li(tmp2, kFClassNaNMinValue);
+        FMv(rd, rs1, type);
+        __ Bgeu(tmp, tmp2, &done);
+        // Return `rs2` if it's NaN.
+        FClass(tmp, rs2, type);
+        FMv(rd, rs2, type);
+        __ Bgeu(tmp, tmp2, &done);
+        // Calculate Min/Max for non-NaN arguments.
+        if (instruction->IsMin()) {
+          FMin(rd, rs1, rs2, type);
+        } else {
+          FMax(rd, rs1, rs2, type);
+        }
+        __ Bind(&done);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected binary operation type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderRISCV64::HandleCondition(HCondition* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  switch (instruction->InputAt(0)->GetType()) {
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      break;
+
+    default: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      HInstruction* rhs = instruction->InputAt(1);
+      bool use_imm = false;
+      if (rhs->IsConstant()) {
+        int64_t imm = CodeGenerator::GetInt64ValueOf(rhs->AsConstant());
+        if (instruction->IsEmittedAtUseSite()) {
+          // For `HIf`, materialize all non-zero constants with an `HParallelMove`.
+          // Note: For certain constants and conditions, the code could be improved.
+          // For example, 2048 takes two instructions to materialize but the negative
+          // -2048 could be embedded in ADDI for EQ/NE comparison.
+          use_imm = (imm == 0);
+        } else {
+          // Constants that cannot be embedded in an instruction's 12-bit immediate shall be
+          // materialized with an `HParallelMove`. This simplifies the code and avoids cases
+          // with arithmetic overflow. Adjust the `imm` if needed for a particular instruction.
+          switch (instruction->GetCondition()) {
+            case kCondEQ:
+            case kCondNE:
+              imm = -imm;  // ADDI with negative immediate (there is no SUBI).
+              break;
+            case kCondLE:
+            case kCondGT:
+            case kCondBE:
+            case kCondA:
+              imm += 1;    // SLTI/SLTIU with adjusted immediate (there is no SLEI/SLEIU).
+              break;
+            default:
+              break;
+          }
+          use_imm = IsInt<12>(imm);
+        }
+      }
+      if (use_imm) {
+        locations->SetInAt(1, Location::ConstantLocation(rhs));
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+      }
+      break;
+    }
+  }
+  if (!instruction->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleCondition(HCondition* instruction) {
+  if (instruction->IsEmittedAtUseSite()) {
+    return;
+  }
+
+  DataType::Type type = instruction->InputAt(0)->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+  switch (type) {
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      GenerateFpCondition(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
+      return;
+    default:
+      // Integral types and reference equality.
+      GenerateIntLongCondition(instruction->GetCondition(), locations);
+      return;
+  }
+}
+
+void LocationsBuilderRISCV64::HandleShift(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsShl() ||
+         instruction->IsShr() ||
+         instruction->IsUShr() ||
+         instruction->IsRor());
+
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  DataType::Type type = instruction->GetResultType();
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift type " << type;
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleShift(HBinaryOperation* instruction) {
+  DCHECK(instruction->IsShl() ||
+         instruction->IsShr() ||
+         instruction->IsUShr() ||
+         instruction->IsRor());
+  LocationSummary* locations = instruction->GetLocations();
+  DataType::Type type = instruction->GetType();
+
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64: {
+      XRegister rd = locations->Out().AsRegister<XRegister>();
+      XRegister rs1 = locations->InAt(0).AsRegister<XRegister>();
+      Location rs2_location = locations->InAt(1);
+
+      if (rs2_location.IsConstant()) {
+        int64_t imm = CodeGenerator::GetInt64ValueOf(rs2_location.GetConstant());
+        uint32_t shamt =
+            imm & (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
+
+        if (shamt == 0) {
+          if (rd != rs1) {
+            __ Mv(rd, rs1);
+          }
+        } else if (type == DataType::Type::kInt32) {
+          if (instruction->IsShl()) {
+            __ Slliw(rd, rs1, shamt);
+          } else if (instruction->IsShr()) {
+            __ Sraiw(rd, rs1, shamt);
+          } else if (instruction->IsUShr()) {
+            __ Srliw(rd, rs1, shamt);
+          } else {
+            DCHECK(instruction->IsRor());
+            __ Roriw(rd, rs1, shamt);
+          }
+        } else {
+          if (instruction->IsShl()) {
+            __ Slli(rd, rs1, shamt);
+          } else if (instruction->IsShr()) {
+            __ Srai(rd, rs1, shamt);
+          } else if (instruction->IsUShr()) {
+            __ Srli(rd, rs1, shamt);
+          } else {
+            DCHECK(instruction->IsRor());
+            __ Rori(rd, rs1, shamt);
+          }
+        }
+      } else {
+        XRegister rs2 = rs2_location.AsRegister<XRegister>();
+        if (type == DataType::Type::kInt32) {
+          if (instruction->IsShl()) {
+            __ Sllw(rd, rs1, rs2);
+          } else if (instruction->IsShr()) {
+            __ Sraw(rd, rs1, rs2);
+          } else if (instruction->IsUShr()) {
+            __ Srlw(rd, rs1, rs2);
+          } else {
+            DCHECK(instruction->IsRor());
+            __ Rorw(rd, rs1, rs2);
+          }
+        } else {
+          if (instruction->IsShl()) {
+            __ Sll(rd, rs1, rs2);
+          } else if (instruction->IsShr()) {
+            __ Sra(rd, rs1, rs2);
+          } else if (instruction->IsUShr()) {
+            __ Srl(rd, rs1, rs2);
+          } else {
+            DCHECK(instruction->IsRor());
+            __ Ror(rd, rs1, rs2);
+          }
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected shift operation type " << type;
+  }
+}
+
+void CodeGeneratorRISCV64::MarkGCCard(XRegister object,
+                                     XRegister value,
+                                     bool value_can_be_null) {
+  Riscv64Label done;
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister card = srs.AllocateXRegister();
+  XRegister temp = srs.AllocateXRegister();
+  if (value_can_be_null) {
+    __ Beqz(value, &done);
+  }
+  // Load the address of the card table into `card`.
+  __ Loadd(card, TR, Thread::CardTableOffset<kRiscv64PointerSize>().Int32Value());
+
+  // Calculate the address of the card corresponding to `object`.
+  __ Srli(temp, object, gc::accounting::CardTable::kCardShift);
+  __ Add(temp, card, temp);
+  // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+  // `object`'s card.
+  //
+  // Register `card` contains the address of the card table. Note that the card
+  // table's base is biased during its creation so that it always starts at an
+  // address whose least-significant byte is equal to `kCardDirty` (see
+  // art::gc::accounting::CardTable::Create). Therefore the SB instruction
+  // below writes the `kCardDirty` (byte) value into the `object`'s card
+  // (located at `card + object >> kCardShift`).
+  //
+  // This dual use of the value in register `card` (1. to calculate the location
+  // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+  // (no need to explicitly load `kCardDirty` as an immediate value).
+  __ Storeb(card, temp, 0);
+  if (value_can_be_null) {
+    __ Bind(&done);
+  }
+}
+
+void LocationsBuilderRISCV64::HandleFieldSet(HInstruction* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, ValueLocationForStore(instruction->InputAt(1)));
+}
+
+void InstructionCodeGeneratorRISCV64::HandleFieldSet(HInstruction* instruction,
+                                                     const FieldInfo& field_info,
+                                                     bool value_can_be_null,
+                                                     WriteBarrierKind write_barrier_kind) {
+  DataType::Type type = field_info.GetFieldType();
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister obj = locations->InAt(0).AsRegister<XRegister>();
+  Location value = locations->InAt(1);
+  DCHECK_IMPLIES(value.IsConstant(), IsZeroBitPattern(value.GetConstant()));
+  bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool is_predicated =
+      instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
+
+  Riscv64Label pred_is_null;
+  if (is_predicated) {
+    __ Beqz(obj, &pred_is_null);
+  }
+
+  if (is_volatile) {
+    if (DataType::Size(type) >= 4u) {
+      // Use AMOSWAP for 32-bit and 64-bit data types.
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister swap_src = kNoXRegister;
+      if (kPoisonHeapReferences && type == DataType::Type::kReference && !value.IsConstant()) {
+        swap_src = srs.AllocateXRegister();
+        __ Mv(swap_src, value.AsRegister<XRegister>());
+        codegen_->PoisonHeapReference(swap_src);
+      } else if (type == DataType::Type::kFloat64 && !value.IsConstant()) {
+        swap_src = srs.AllocateXRegister();
+        __ FMvXD(swap_src, value.AsFpuRegister<FRegister>());
+      } else if (type == DataType::Type::kFloat32 && !value.IsConstant()) {
+        swap_src = srs.AllocateXRegister();
+        __ FMvXW(swap_src, value.AsFpuRegister<FRegister>());
+      } else {
+        swap_src = InputXRegisterOrZero(value);
+      }
+      XRegister addr = srs.AllocateXRegister();
+      __ AddConst64(addr, obj, offset);
+      if (DataType::Is64BitType(type)) {
+        __ AmoSwapD(Zero, swap_src, addr, AqRl::kRelease);
+      } else {
+        __ AmoSwapW(Zero, swap_src, addr, AqRl::kRelease);
+      }
+    } else {
+      // Use fences for smaller data types.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+      Store(value, obj, offset, type);
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+    }
+  } else {
+    Store(value, obj, offset, type);
+  }
+
+  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)) &&
+      write_barrier_kind != WriteBarrierKind::kDontEmit) {
+    codegen_->MarkGCCard(
+        obj,
+        value.AsRegister<XRegister>(),
+        value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitWithNullCheck);
+  }
+
+  if (is_predicated) {
+    __ Bind(&pred_is_null);
+  }
+}
+
+void LocationsBuilderRISCV64::HandleFieldGet(HInstruction* instruction) {
+  DCHECK(instruction->IsInstanceFieldGet() ||
+         instruction->IsStaticFieldGet() ||
+         instruction->IsPredicatedInstanceFieldGet());
+
+  bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
+
+  bool object_field_get_with_read_barrier =
+      gUseReadBarrier && (instruction->GetType() == DataType::Type::kReference);
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+      instruction,
+      object_field_get_with_read_barrier
+          ? LocationSummary::kCallOnSlowPath
+          : LocationSummary::kNoCall);
+
+  // Input for object receiver.
+  locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
+
+  if (DataType::IsFloatingPointType(instruction->GetType())) {
+    if (is_predicated) {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+    } else {
+      locations->SetOut(Location::RequiresFpuRegister());
+    }
+  } else {
+    if (is_predicated) {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+    } else {
+      // The output overlaps for an object field get when read barriers
+      // are enabled: we do not want the load to overwrite the object's
+      // location, as we need it to emit the read barrier.
+      locations->SetOut(Location::RequiresRegister(),
+                        object_field_get_with_read_barrier ? Location::kOutputOverlap
+                                                           : Location::kNoOutputOverlap);
+    }
+  }
+
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::HandleFieldGet(HInstruction* instruction,
+                                                     const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldGet() ||
+         instruction->IsStaticFieldGet() ||
+         instruction->IsPredicatedInstanceFieldGet());
+  DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
+  DataType::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
+  XRegister obj = obj_loc.AsRegister<XRegister>();
+  Location dst_loc = locations->Out();
+  bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+
+  if (is_volatile) {
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+  }
+
+  if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
+    // /* HeapReference<Object> */ dst = *(obj + offset)
+    Location temp_loc = locations->GetTemp(0);
+    // Note that a potential implicit null check is handled in this
+    // CodeGeneratorRISCV64::GenerateFieldLoadWithBakerReadBarrier call.
+    codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                    dst_loc,
+                                                    obj,
+                                                    offset,
+                                                    temp_loc,
+                                                    /* needs_null_check= */ true);
+  } else {
+    Load(dst_loc, obj, offset, type);
+  }
+
+  if (is_volatile) {
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+  }
+
+  if (type == DataType::Type::kReference && !(gUseReadBarrier && kUseBakerReadBarrier)) {
+    // If read barriers are enabled, emit read barriers other than
+    // Baker's using a slow path (and also unpoison the loaded
+    // reference, if heap poisoning is enabled).
+    codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+  }
+}
+
+void LocationsBuilderRISCV64::VisitAbove(HAbove* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAbove(HAbove* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAboveOrEqual(HAboveOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAbs(HAbs* abs) {
+  LocationSummary* locations = abs->GetLocations();
+  switch (abs->GetResultType()) {
+    case DataType::Type::kInt32: {
+      XRegister in = locations->InAt(0).AsRegister<XRegister>();
+      XRegister out = locations->Out().AsRegister<XRegister>();
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister tmp = srs.AllocateXRegister();
+      __ Sraiw(tmp, in, 31);
+      __ Xor(out, in, tmp);
+      __ Subw(out, out, tmp);
+      break;
+    }
+    case DataType::Type::kInt64: {
+      XRegister in = locations->InAt(0).AsRegister<XRegister>();
+      XRegister out = locations->Out().AsRegister<XRegister>();
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister tmp = srs.AllocateXRegister();
+      __ Srai(tmp, in, 63);
+      __ Xor(out, in, tmp);
+      __ Sub(out, out, tmp);
+      break;
+    }
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      FAbs(locations->Out().AsFpuRegister<FRegister>(),
+           locations->InAt(0).AsFpuRegister<FRegister>(),
+           abs->GetResultType());
+      break;
+    default:
+      LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAdd(HAdd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitAnd(HAnd* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitArrayGet(HArrayGet* instruction) {
+  DataType::Type type = instruction->GetType();
+  bool object_array_get_with_read_barrier = gUseReadBarrier && (type == DataType::Type::kReference);
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(instruction,
+                      object_array_get_with_read_barrier ? LocationSummary::kCallOnSlowPath :
+                                                           LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (DataType::IsFloatingPointType(type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  }
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorRISCV64::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArrayGet(HArrayGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  XRegister obj = obj_loc.AsRegister<XRegister>();
+  Location out_loc = locations->Out();
+  Location index = locations->InAt(1);
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  DataType::Type type = instruction->GetType();
+  const bool maybe_compressed_char_at =
+      mirror::kUseStringCompression && instruction->IsStringCharAt();
+
+  Riscv64Label string_char_at_done;
+  if (maybe_compressed_char_at) {
+    DCHECK_EQ(type, DataType::Type::kUint16);
+    uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+    Riscv64Label uncompressed_load;
+    {
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister tmp = srs.AllocateXRegister();
+      __ Loadw(tmp, obj, count_offset);
+      __ Andi(tmp, tmp, 0x1);
+      static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                    "Expecting 0=compressed, 1=uncompressed");
+      __ Bnez(tmp, &uncompressed_load);
+    }
+    XRegister out = out_loc.AsRegister<XRegister>();
+    if (index.IsConstant()) {
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+      __ Loadbu(out, obj, data_offset + const_index);
+    } else {
+      __ Add(out, obj, index.AsRegister<XRegister>());
+      __ Loadbu(out, out, data_offset);
+    }
+    __ J(&string_char_at_done);
+    __ Bind(&uncompressed_load);
+  }
+
+  if (type == DataType::Type::kReference && gUseReadBarrier && kUseBakerReadBarrier) {
+    static_assert(
+        sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+        "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+    // /* HeapReference<Object> */ out =
+    //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+    // Note that a potential implicit null check could be handled in these
+    // `CodeGeneratorRISCV64::Generate{Array,Field}LoadWithBakerReadBarrier()` calls
+    // but we currently do not support implicit null checks on `HArrayGet`.
+    DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
+    Location temp = locations->GetTemp(0);
+    if (index.IsConstant()) {
+      // Array load with a constant index can be treated as a field load.
+      static constexpr size_t shift = DataType::SizeShift(DataType::Type::kReference);
+      size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << shift) + data_offset;
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out_loc,
+                                                      obj,
+                                                      offset,
+                                                      temp,
+                                                      /* needs_null_check= */ false);
+    } else {
+      codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+                                                      out_loc,
+                                                      obj,
+                                                      data_offset,
+                                                      index,
+                                                      temp,
+                                                      /* needs_null_check= */ false);
+    }
+  } else if (index.IsConstant()) {
+    int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+    int32_t offset = data_offset + (const_index << DataType::SizeShift(type));
+    Load(out_loc, obj, offset, type);
+    if (type == DataType::Type::kReference) {
+      DCHECK(!(gUseReadBarrier && kUseBakerReadBarrier));
+      // If read barriers are enabled, emit read barriers other than Baker's using
+      // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled).
+      codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+    }
+  } else {
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    ShNAdd(tmp, index.AsRegister<XRegister>(), obj, type);
+    Load(out_loc, tmp, data_offset, type);
+    if (type == DataType::Type::kReference) {
+      DCHECK(!(gUseReadBarrier && kUseBakerReadBarrier));
+      // If read barriers are enabled, emit read barriers other than Baker's using
+      // a slow path (and also unpoison the loaded reference, if heap poisoning is enabled).
+      codegen_->MaybeGenerateReadBarrierSlow(
+          instruction, out_loc, out_loc, obj_loc, data_offset, index);
+    }
+  }
+
+  if (maybe_compressed_char_at) {
+    __ Bind(&string_char_at_done);
+  }
+}
+
+void LocationsBuilderRISCV64::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+  XRegister obj = locations->InAt(0).AsRegister<XRegister>();
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  __ Loadwu(out, obj, offset);  // Unsigned for string length; does not matter for other arrays.
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // Mask out compression flag from String's array length.
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    __ Srli(out, out, 1u);
+  }
+}
+
+void LocationsBuilderRISCV64::VisitArraySet(HArraySet* instruction) {
+  bool needs_type_check = instruction->NeedsTypeCheck();
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+      instruction,
+      needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  locations->SetInAt(2, ValueLocationForStore(instruction->GetValue()));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitArraySet(HArraySet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister array = locations->InAt(0).AsRegister<XRegister>();
+  Location index = locations->InAt(1);
+  Location value = locations->InAt(2);
+  DataType::Type value_type = instruction->GetComponentType();
+  bool needs_type_check = instruction->NeedsTypeCheck();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  size_t data_offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
+  SlowPathCodeRISCV64* slow_path = nullptr;
+
+  if (needs_write_barrier) {
+    DCHECK_EQ(value_type, DataType::Type::kReference);
+    DCHECK(!value.IsConstant());
+    Riscv64Label do_store;
+
+    bool can_value_be_null = instruction->GetValueCanBeNull();
+    if (can_value_be_null) {
+      __ Beqz(value.AsRegister<XRegister>(), &do_store);
+    }
+
+    if (needs_type_check) {
+      slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathRISCV64(instruction);
+      codegen_->AddSlowPath(slow_path);
+
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister temp1 = srs.AllocateXRegister();
+      XRegister temp2 = srs.AllocateXRegister();
+
+      // Note that when read barriers are enabled, the type checks are performed
+      // without read barriers.  This is fine, even in the case where a class object
+      // is in the from-space after the flip, as a comparison involving such a type
+      // would not produce a false positive; it may of course produce a false
+      // negative, in which case we would take the ArraySet slow path.
+
+      // /* HeapReference<Class> */ temp1 = array->klass_
+      __ Loadwu(temp1, array, class_offset);
+      codegen_->MaybeRecordImplicitNullCheck(instruction);
+      codegen_->MaybeUnpoisonHeapReference(temp1);
+
+      // /* HeapReference<Class> */ temp2 = temp1->component_type_
+      __ Loadwu(temp2, temp1, component_offset);
+      // /* HeapReference<Class> */ temp1 = value->klass_
+      __ Loadwu(temp1, value.AsRegister<XRegister>(), class_offset);
+      // If heap poisoning is enabled, no need to unpoison `temp1`
+      // nor `temp2`, as we are comparing two poisoned references.
+      if (instruction->StaticTypeOfArrayIsObjectArray()) {
+        Riscv64Label do_put;
+        __ Beq(temp1, temp2, &do_put);
+        // If heap poisoning is enabled, the `temp2` reference has
+        // not been unpoisoned yet; unpoison it now.
+        codegen_->MaybeUnpoisonHeapReference(temp2);
+
+        // /* HeapReference<Class> */ temp1 = temp2->super_class_
+        __ Loadwu(temp1, temp2, super_offset);
+        // If heap poisoning is enabled, no need to unpoison
+        // `temp1`, as we are comparing against null below.
+        __ Bnez(temp1, slow_path->GetEntryLabel());
+        __ Bind(&do_put);
+      } else {
+        __ Bne(temp1, temp2, slow_path->GetEntryLabel());
+      }
+    }
+
+    if (instruction->GetWriteBarrierKind() != WriteBarrierKind::kDontEmit) {
+      DCHECK_EQ(instruction->GetWriteBarrierKind(), WriteBarrierKind::kEmitNoNullCheck)
+          << " Already null checked so we shouldn't do it again.";
+      codegen_->MarkGCCard(array, value.AsRegister<XRegister>(), /* emit_null_check= */ false);
+    }
+
+    if (can_value_be_null) {
+      __ Bind(&do_store);
+    }
+  }
+
+  if (index.IsConstant()) {
+    int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+    int32_t offset = data_offset + (const_index << DataType::SizeShift(value_type));
+    Store(value, array, offset, value_type);
+  } else {
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    ShNAdd(tmp, index.AsRegister<XRegister>(), array, value_type);
+    Store(value, tmp, data_offset, value_type);
+  }
+  // There must be no instructions between the `Store()` and the `MaybeRecordImplicitNullCheck()`.
+  // We can avoid this if the type check makes the null check unconditionally.
+  DCHECK_IMPLIES(needs_type_check, needs_write_barrier);
+  if (!(needs_type_check && !instruction->GetValueCanBeNull())) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void LocationsBuilderRISCV64::VisitBelow(HBelow* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBelow(HBelow* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBelowOrEqual(HBelowOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBooleanNot(HBooleanNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  __ Xori(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>(), 1);
+}
+
+void LocationsBuilderRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  RegisterSet caller_saves = RegisterSet::Empty();
+  InvokeRuntimeCallingConvention calling_convention;
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
+
+  HInstruction* index = instruction->InputAt(0);
+  HInstruction* length = instruction->InputAt(1);
+
+  bool const_index = false;
+  bool const_length = false;
+
+  if (length->IsConstant()) {
+    if (index->IsConstant()) {
+      const_index = true;
+      const_length = true;
+    } else {
+      int32_t length_value = length->AsIntConstant()->GetValue();
+      if (length_value == 0 || length_value == 1) {
+        const_length = true;
+      }
+    }
+  } else if (index->IsConstant()) {
+    int32_t index_value = index->AsIntConstant()->GetValue();
+    if (index_value <= 0) {
+      const_index = true;
+    }
+  }
+
+  locations->SetInAt(
+      0,
+      const_index ? Location::ConstantLocation(index) : Location::RequiresRegister());
+  locations->SetInAt(
+      1,
+      const_length ? Location::ConstantLocation(length) : Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBoundsCheck(HBoundsCheck* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location index_loc = locations->InAt(0);
+  Location length_loc = locations->InAt(1);
+
+  if (length_loc.IsConstant()) {
+    int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue();
+    if (index_loc.IsConstant()) {
+      int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+      if (index < 0 || index >= length) {
+        BoundsCheckSlowPathRISCV64* slow_path =
+            new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        __ J(slow_path->GetEntryLabel());
+      } else {
+        // Nothing to be done.
+      }
+      return;
+    }
+
+    BoundsCheckSlowPathRISCV64* slow_path =
+        new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+    codegen_->AddSlowPath(slow_path);
+    XRegister index = index_loc.AsRegister<XRegister>();
+    if (length == 0) {
+      __ J(slow_path->GetEntryLabel());
+    } else {
+      DCHECK_EQ(length, 1);
+      __ Bnez(index, slow_path->GetEntryLabel());
+    }
+  } else {
+    XRegister length = length_loc.AsRegister<XRegister>();
+    BoundsCheckSlowPathRISCV64* slow_path =
+        new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathRISCV64(instruction);
+    codegen_->AddSlowPath(slow_path);
+    if (index_loc.IsConstant()) {
+      int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue();
+      if (index < 0) {
+        __ J(slow_path->GetEntryLabel());
+      } else {
+        DCHECK_EQ(index, 0);
+        __ Blez(length, slow_path->GetEntryLabel());
+      }
+    } else {
+      XRegister index = index_loc.AsRegister<XRegister>();
+      __ Bgeu(index, length, slow_path->GetEntryLabel());
+    }
+  }
+}
+
+void LocationsBuilderRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (gUseReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class and maybe a temp for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck)
+    return 3;
+
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
+void LocationsBuilderRISCV64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCheckCast(HCheckCast* instruction) {
+TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  XRegister obj = obj_loc.AsRegister<XRegister>();
+  Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Location::NoLocation()
+      : locations->InAt(1);
+  Location temp_loc = locations->GetTemp(0);
+  XRegister temp = temp_loc.AsRegister<XRegister>();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_GE(num_temps, 1u);
+  DCHECK_LE(num_temps, 3u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+  Riscv64Label done;
+
+  bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
+  SlowPathCodeRISCV64* slow_path =
+      new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64(
+          instruction, is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(slow_path);
+
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Beqz(obj, &done);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ Bne(temp, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      Riscv64Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Beqz(temp, slow_path->GetEntryLabel());
+      // Otherwise, compare the classes.
+      __ Bne(temp, cls.AsRegister<XRegister>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Walk over the class hierarchy to find a match.
+      Riscv64Label loop;
+      __ Bind(&loop);
+      __ Beq(temp, cls.AsRegister<XRegister>(), &done);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception. Otherwise, jump to the beginning of the loop.
+      __ Bnez(temp, &loop);
+      __ J(slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Do an exact check.
+      __ Beq(temp, cls.AsRegister<XRegister>(), &done);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Beqz(temp, slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array, further check that this component
+      // type is not a primitive type.
+      __ Loadhu(temp, temp, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Bnez(temp, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+      // We always go into the type check slow path for the unresolved check case.
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ J(slow_path->GetEntryLabel());
+      break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this. False negatives are handled by the slow path.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       iftable_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      XRegister temp2 = maybe_temp2_loc.AsRegister<XRegister>();
+      XRegister temp3 = maybe_temp3_loc.AsRegister<XRegister>();
+      // Iftable is never null.
+      __ Loadw(temp2, temp, array_length_offset);
+      // Loop through the iftable and check if any class matches.
+      Riscv64Label loop;
+      __ Bind(&loop);
+      __ Beqz(temp2, slow_path->GetEntryLabel());
+      __ Lwu(temp3, temp, object_array_data_offset);
+      codegen_->MaybeUnpoisonHeapReference(temp3);
+      // Go to next interface.
+      __ Addi(temp, temp, 2 * kHeapReferenceSize);
+      __ Addi(temp2, temp2, -2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Bne(temp3, cls.AsRegister<XRegister>(), &loop);
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, temp);
+      __ Bnez(temp, slow_path->GetEntryLabel());
+      break;
+    }
+  }
+
+  __ Bind(&done);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void LocationsBuilderRISCV64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister in = locations->InAt(0).AsRegister<XRegister>();
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    MemberOffset method_offset =
+        mirror::Class::EmbeddedVTableEntryOffset(instruction->GetIndex(), kRiscv64PointerSize);
+    __ Loadd(out, in, method_offset.SizeValue());
+  } else {
+    uint32_t method_offset = dchecked_integral_cast<uint32_t>(
+        ImTable::OffsetOfElement(instruction->GetIndex(), kRiscv64PointerSize));
+    __ Loadd(out, in, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value());
+    __ Loadd(out, out, method_offset);
+  }
+}
+
+static int32_t GetExceptionTlsOffset() {
+  return Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value();
+}
+
+void LocationsBuilderRISCV64::VisitClearException(HClearException* instruction) {
+  new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClearException(
+    [[maybe_unused]] HClearException* instruction) {
+  __ Stored(Zero, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderRISCV64::VisitClinitCheck(HClinitCheck* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+      instruction, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+  // Rely on the type initialization to save everything we need.
+  locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitClinitCheck(HClinitCheck* instruction) {
+  // We assume the class is not null.
+  SlowPathCodeRISCV64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(
+      instruction->GetLoadClass(), instruction);
+  codegen_->AddSlowPath(slow_path);
+  GenerateClassInitializationCheck(slow_path,
+                                   instruction->GetLocations()->InAt(0).AsRegister<XRegister>());
+}
+
+void LocationsBuilderRISCV64::VisitCompare(HCompare* instruction) {
+  DataType::Type in_type = instruction->InputAt(0)->GetType();
+
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+  switch (in_type) {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, RegisterOrZeroBitPatternLocation(instruction->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << in_type;
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCompare(HCompare* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister result = locations->Out().AsRegister<XRegister>();
+  DataType::Type in_type = instruction->InputAt(0)->GetType();
+
+  //  0 if: left == right
+  //  1 if: left  > right
+  // -1 if: left  < right
+  switch (in_type) {
+    case DataType::Type::kBool:
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64: {
+      XRegister left = locations->InAt(0).AsRegister<XRegister>();
+      XRegister right = InputXRegisterOrZero(locations->InAt(1));
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister tmp = srs.AllocateXRegister();
+      __ Slt(tmp, left, right);
+      __ Slt(result, right, left);
+      __ Sub(result, result, tmp);
+      break;
+    }
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64: {
+      FRegister left = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister right = locations->InAt(1).AsFpuRegister<FRegister>();
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister tmp = srs.AllocateXRegister();
+      if (instruction->IsGtBias()) {
+        // ((FLE l,r) ^ 1) - (FLT l,r); see `GenerateFpCondition()`.
+        FLe(tmp, left, right, in_type);
+        FLt(result, left, right, in_type);
+        __ Xori(tmp, tmp, 1);
+        __ Sub(result, tmp, result);
+      } else {
+        // ((FLE r,l) - 1) + (FLT r,l); see `GenerateFpCondition()`.
+        FLe(tmp, right, left, in_type);
+        FLt(result, right, left, in_type);
+        __ Addi(tmp, tmp, -1);
+        __ Add(result, result, tmp);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unimplemented compare type " << in_type;
+  }
+}
+
+void LocationsBuilderRISCV64::VisitConstructorFence(HConstructorFence* instruction) {
+  instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitConstructorFence(
+    [[maybe_unused]] HConstructorFence* instruction) {
+  codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+}
+
+void LocationsBuilderRISCV64::VisitCurrentMethod(HCurrentMethod* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::RegisterLocation(kArtMethodRegister));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitCurrentMethod(
+    [[maybe_unused]] HCurrentMethod* instruction) {
+  // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderRISCV64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShouldDeoptimizeFlag(
+    HShouldDeoptimizeFlag* instruction) {
+  __ Loadw(instruction->GetLocations()->Out().AsRegister<XRegister>(),
+           SP,
+           codegen_->GetStackOffsetOfShouldDeoptimizeFlag());
+}
+
+void LocationsBuilderRISCV64::VisitDeoptimize(HDeoptimize* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+  InvokeRuntimeCallingConvention calling_convention;
+  RegisterSet caller_saves = RegisterSet::Empty();
+  caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetCustomSlowPathCallerSaves(caller_saves);
+  if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDeoptimize(HDeoptimize* instruction) {
+  SlowPathCodeRISCV64* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathRISCV64>(instruction);
+  GenerateTestAndBranch(instruction,
+                        /* condition_input_index= */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target= */ nullptr);
+}
+
+void LocationsBuilderRISCV64::VisitDiv(HDiv* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  switch (instruction->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected div type " << instruction->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDiv(HDiv* instruction) {
+  DataType::Type type = instruction->GetType();
+  LocationSummary* locations = instruction->GetLocations();
+
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      GenerateDivRemIntegral(instruction);
+      break;
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64: {
+      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+      FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+      FDiv(dst, lhs, rhs, type);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected div type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+  locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  SlowPathCodeRISCV64* slow_path =
+      new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathRISCV64(instruction);
+  codegen_->AddSlowPath(slow_path);
+  Location value = instruction->GetLocations()->InAt(0);
+
+  DataType::Type type = instruction->GetType();
+
+  if (!DataType::IsIntegralType(type)) {
+    LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
+    UNREACHABLE();
+  }
+
+  if (value.IsConstant()) {
+    int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
+    if (divisor == 0) {
+      __ J(slow_path->GetEntryLabel());
+    } else {
+      // A division by a non-null constant is valid. We don't need to perform
+      // any check, so simply fall through.
+    }
+  } else {
+    __ Beqz(value.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+  }
+}
+
+void LocationsBuilderRISCV64::VisitDoubleConstant(HDoubleConstant* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitDoubleConstant(
+    [[maybe_unused]] HDoubleConstant* instruction) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitEqual(HEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitEqual(HEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitExit(HExit* instruction) {
+  instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitExit([[maybe_unused]] HExit* instruction) {}
+
+void LocationsBuilderRISCV64::VisitFloatConstant(HFloatConstant* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitFloatConstant(
+    [[maybe_unused]] HFloatConstant* instruction) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitGoto(HGoto* instruction) {
+  instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGoto(HGoto* instruction) {
+  HandleGoto(instruction, instruction->GetSuccessor());
+}
+
+void LocationsBuilderRISCV64::VisitGreaterThan(HGreaterThan* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGreaterThan(HGreaterThan* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitIf(HIf* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  if (IsBooleanValueOrMaterializedCondition(instruction->InputAt(0))) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIf(HIf* instruction) {
+  HBasicBlock* true_successor = instruction->IfTrueSuccessor();
+  HBasicBlock* false_successor = instruction->IfFalseSuccessor();
+  Riscv64Label* true_target = codegen_->GoesToNextBlock(instruction->GetBlock(), true_successor)
+      ? nullptr
+      : codegen_->GetLabelOf(true_successor);
+  Riscv64Label* false_target = codegen_->GoesToNextBlock(instruction->GetBlock(), false_successor)
+      ? nullptr
+      : codegen_->GetLabelOf(false_successor);
+  GenerateTestAndBranch(instruction, /* condition_input_index= */ 0, true_target, false_target);
+}
+
+void LocationsBuilderRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction,
+                 instruction->GetFieldInfo(),
+                 instruction->GetValueCanBeNull(),
+                 instruction->GetWriteBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitPredicatedInstanceFieldGet(
+    HPredicatedInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPredicatedInstanceFieldGet(
+    HPredicatedInstanceFieldGet* instruction) {
+  Riscv64Label finish;
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister target = locations->InAt(1).AsRegister<XRegister>();
+  __ Beqz(target, &finish);
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+  __ Bind(&finish);
+}
+
+void LocationsBuilderRISCV64::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool baker_read_barrier_slow_path = false;
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck: {
+      bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
+      call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+      baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
+      break;
+    }
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+    case TypeCheckKind::kBitstringCheck:
+      break;
+  }
+
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+  if (baker_read_barrier_slow_path) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+    locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
+    locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
+    locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  // The output does overlap inputs.
+  // Note that TypeCheckSlowPathRISCV64 uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  XRegister obj = obj_loc.AsRegister<XRegister>();
+  Location cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+      ? Location::NoLocation()
+      : locations->InAt(1);
+  Location out_loc = locations->Out();
+  XRegister out = out_loc.AsRegister<XRegister>();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  Riscv64Label done;
+  SlowPathCodeRISCV64* slow_path = nullptr;
+
+  // Return 0 if `obj` is null.
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Mv(out, Zero);
+    __ Beqz(obj, &done);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck: {
+      ReadBarrierOption read_barrier_option =
+          CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+      // Classes must be equal for the instanceof to succeed.
+      __ Xor(out, out, cls.AsRegister<XRegister>());
+      __ Seqz(out, out);
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      ReadBarrierOption read_barrier_option =
+          CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      Riscv64Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(
+          instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqz(out, &done);
+      __ Bne(out, cls.AsRegister<XRegister>(), &loop);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      ReadBarrierOption read_barrier_option =
+          CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+      // Walk over the class hierarchy to find a match.
+      Riscv64Label loop, success;
+      __ Bind(&loop);
+      __ Beq(out, cls.AsRegister<XRegister>(), &success);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(
+          instruction, out_loc, super_offset, maybe_temp_loc, read_barrier_option);
+      __ Bnez(out, &loop);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ J(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      ReadBarrierOption read_barrier_option =
+          CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
+      // FIXME(riscv64): We currently have marking entrypoints for 29 registers.
+      // We need to either store entrypoint for register `N` in entry `N-A` where
+      // `A` can be up to 5 (Zero, RA, SP, GP, TP are not valid registers for
+      // marking), or define two more entrypoints, or request an additional temp
+      // from the register allocator instead of using a scratch register.
+      ScratchRegisterScope srs(GetAssembler());
+      Location tmp = Location::RegisterLocation(srs.AllocateXRegister());
+      // /* HeapReference<Class> */ tmp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, tmp, obj_loc, class_offset, maybe_temp_loc, read_barrier_option);
+      // Do an exact check.
+      __ LoadConst32(out, 1);
+      __ Beq(tmp.AsRegister<XRegister>(), cls.AsRegister<XRegister>(), &done);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, out_loc, tmp, component_offset, maybe_temp_loc, read_barrier_option);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqz(out, &done);
+      __ Loadhu(out, out, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Seqz(out, out);
+      break;
+    }
+
+    case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier);
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (codegen_->GetScopedAllocator())
+          TypeCheckSlowPathRISCV64(instruction, /* is_fatal= */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ Bne(out, cls.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathRISCV64(
+          instruction, /* is_fatal= */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ J(slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kBitstringCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(
+          instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, kWithoutReadBarrier);
+
+      GenerateBitstringTypeCheckCompare(instruction, out);
+      __ Beqz(out, out);
+      break;
+    }
+  }
+
+  __ Bind(&done);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void LocationsBuilderRISCV64::VisitIntConstant(HIntConstant* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIntConstant([[maybe_unused]] HIntConstant* instruction) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) {
+  // The trampoline uses the same calling convention as dex calling conventions, except
+  // instead of loading arg0/A0 with the target Method*, arg0/A0 will contain the method_idx.
+  HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeUnresolved(HInvokeUnresolved* instruction) {
+  codegen_->GenerateInvokeUnresolvedRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) {
+  HandleInvoke(instruction);
+  // Use T0 as the hidden argument for `art_quick_imt_conflict_trampoline`.
+  if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
+    instruction->GetLocations()->SetInAt(instruction->GetNumberOfArguments() - 1,
+                                         Location::RegisterLocation(T0));
+  } else {
+    instruction->GetLocations()->AddTemp(Location::RegisterLocation(T0));
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeInterface(HInvokeInterface* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister temp = locations->GetTemp(0).AsRegister<XRegister>();
+  XRegister receiver = locations->InAt(0).AsRegister<XRegister>();
+  int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize);
+
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  __ Loadwu(temp, receiver, class_offset);
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  codegen_->MaybeUnpoisonHeapReference(temp);
+
+  // If we're compiling baseline, update the inline cache.
+  codegen_->MaybeGenerateInlineCacheCheck(instruction, temp);
+
+  // The register T0 is required to be used for the hidden argument in
+  // `art_quick_imt_conflict_trampoline`.
+  if (instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
+      instruction->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
+    Location hidden_reg = instruction->GetLocations()->GetTemp(1);
+    // Load the resolved interface method in the hidden argument register T0.
+    DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>());
+    codegen_->LoadMethod(instruction->GetHiddenArgumentLoadKind(), hidden_reg, instruction);
+  }
+
+  __ Loadd(temp, temp, mirror::Class::ImtPtrOffset(kRiscv64PointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      instruction->GetImtIndex(), kRiscv64PointerSize));
+  // temp = temp->GetImtEntryAt(method_offset);
+  __ Loadd(temp, temp, method_offset);
+  if (instruction->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
+    // We pass the method from the IMT in case of a conflict. This will ensure
+    // we go into the runtime to resolve the actual method.
+    Location hidden_reg = instruction->GetLocations()->GetTemp(1);
+    DCHECK_EQ(T0, hidden_reg.AsRegister<XRegister>());
+    __ Mv(hidden_reg.AsRegister<XRegister>(), temp);
+  }
+  // RA = temp->GetEntryPoint();
+  __ Loadd(RA, temp, entry_point.Int32Value());
+
+  // RA();
+  __ Jalr(RA);
+  DCHECK(!codegen_->IsLeafMethod());
+  codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void LocationsBuilderRISCV64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) {
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!instruction->IsStaticWithExplicitClinitCheck());
+
+  IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+  if (intrinsic.TryDispatch(instruction)) {
+    return;
+  }
+
+  if (instruction->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+    CriticalNativeCallingConventionVisitorRiscv64 calling_convention_visitor(
+        /*for_register_allocation=*/ true);
+    CodeGenerator::CreateCommonInvokeLocationSummary(instruction, &calling_convention_visitor);
+  } else {
+    HandleInvoke(instruction);
+  }
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorRISCV64* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorRISCV64 intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeStaticOrDirect(
+    HInvokeStaticOrDirect* instruction) {
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!instruction->IsStaticWithExplicitClinitCheck());
+
+  if (TryGenerateIntrinsicCode(instruction, codegen_)) {
+    return;
+  }
+
+  LocationSummary* locations = instruction->GetLocations();
+  codegen_->GenerateStaticOrDirectCall(
+      instruction, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
+}
+
+void LocationsBuilderRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) {
+  IntrinsicLocationsBuilderRISCV64 intrinsic(GetGraph()->GetAllocator(), codegen_);
+  if (intrinsic.TryDispatch(instruction)) {
+    return;
+  }
+
+  HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeVirtual(HInvokeVirtual* instruction) {
+  if (TryGenerateIntrinsicCode(instruction, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(instruction, instruction->GetLocations()->GetTemp(0));
+  DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) {
+  HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokePolymorphic(HInvokePolymorphic* instruction) {
+  codegen_->GenerateInvokePolymorphicCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) {
+  HandleInvoke(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitInvokeCustom(HInvokeCustom* instruction) {
+  codegen_->GenerateInvokeCustomCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLessThan(HLessThan* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLessThan(HLessThan* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLessThanOrEqual(HLessThanOrEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadClass(HLoadClass* instruction) {
+  HLoadClass::LoadKind load_kind = instruction->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
+    InvokeRuntimeCallingConvention calling_convention;
+    Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+    DCHECK_EQ(DataType::Type::kReference, instruction->GetType());
+    DCHECK(loc.Equals(calling_convention.GetReturnLocation(DataType::Type::kReference)));
+    CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(instruction, loc, loc);
+    return;
+  }
+  DCHECK_EQ(instruction->NeedsAccessCheck(),
+            load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+                load_kind == HLoadClass::LoadKind::kBssEntryPackage);
+
+  const bool requires_read_barrier = gUseReadBarrier && !instruction->IsInBootImage();
+  LocationSummary::CallKind call_kind = (instruction->NeedsEnvironment() || requires_read_barrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+  if (kUseBakerReadBarrier && requires_read_barrier && !instruction->NeedsEnvironment()) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
+  if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
+    if (!gUseReadBarrier || kUseBakerReadBarrier) {
+      // Rely on the type resolution or initialization and marking to save everything we need.
+      locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+    } else {
+      // For non-Baker read barriers we have a temp-clobbering call.
+    }
+  }
+}
+
+// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
+// move.
+void InstructionCodeGeneratorRISCV64::VisitLoadClass(HLoadClass* instruction)
+    NO_THREAD_SAFETY_ANALYSIS {
+  HLoadClass::LoadKind load_kind = instruction->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
+    codegen_->GenerateLoadClassRuntimeCall(instruction);
+    return;
+  }
+  DCHECK_EQ(instruction->NeedsAccessCheck(),
+            load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+                load_kind == HLoadClass::LoadKind::kBssEntryPackage);
+
+  LocationSummary* locations = instruction->GetLocations();
+  Location out_loc = locations->Out();
+  XRegister out = out_loc.AsRegister<XRegister>();
+  const ReadBarrierOption read_barrier_option =
+      instruction->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
+  bool generate_null_check = false;
+  switch (load_kind) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!instruction->CanCallRuntime());
+      DCHECK(!instruction->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      XRegister current_method = locations->InAt(0).AsRegister<XRegister>();
+      GenerateGcRootFieldLoad(instruction,
+                              out_loc,
+                              current_method,
+                              ArtMethod::DeclaringClassOffset().Int32Value(),
+                              read_barrier_option);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
+             codegen_->GetCompilerOptions().IsBootImageExtension());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+          codegen_->NewBootImageTypePatch(instruction->GetDexFile(), instruction->GetTypeIndex());
+      codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+          codegen_->NewBootImageTypePatch(
+              instruction->GetDexFile(), instruction->GetTypeIndex(), info_high);
+      codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out);
+      break;
+    }
+    case HLoadClass::LoadKind::kBootImageRelRo: {
+      DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+          codegen_->NewBootImageRelRoPatch(boot_image_offset);
+      codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
+      codegen_->EmitPcRelativeLwuPlaceholder(info_low, out, out);
+      break;
+    }
+    case HLoadClass::LoadKind::kBssEntry:
+    case HLoadClass::LoadKind::kBssEntryPublic:
+    case HLoadClass::LoadKind::kBssEntryPackage: {
+      CodeGeneratorRISCV64::PcRelativePatchInfo* bss_info_high =
+          codegen_->NewTypeBssEntryPatch(instruction);
+      codegen_->EmitPcRelativeAuipcPlaceholder(bss_info_high, out);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch(
+          instruction, bss_info_high);
+      GenerateGcRootFieldLoad(instruction,
+                              out_loc,
+                              out,
+                              /* offset= */ kLinkTimeOffsetPlaceholderLow,
+                              read_barrier_option,
+                              &info_low->label);
+      generate_null_check = true;
+      break;
+    }
+    case HLoadClass::LoadKind::kJitBootImageAddress: {
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+      uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetClass().Get());
+      DCHECK_NE(address, 0u);
+      __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      break;
+    }
+    case HLoadClass::LoadKind::kJitTableAddress:
+      __ Loadwu(out, codegen_->DeduplicateJitClassLiteral(instruction->GetDexFile(),
+                                                          instruction->GetTypeIndex(),
+                                                          instruction->GetClass()));
+      GenerateGcRootFieldLoad(instruction, out_loc, out, /* offset= */ 0, read_barrier_option);
+      break;
+    case HLoadClass::LoadKind::kRuntimeCall:
+    case HLoadClass::LoadKind::kInvalid:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+  }
+
+  if (generate_null_check || instruction->MustGenerateClinitCheck()) {
+    DCHECK(instruction->CanCallRuntime());
+    SlowPathCodeRISCV64* slow_path =
+        new (codegen_->GetScopedAllocator()) LoadClassSlowPathRISCV64(instruction, instruction);
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ Beqz(out, slow_path->GetEntryLabel());
+    }
+    if (instruction->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
+    }
+  }
+}
+
+void LocationsBuilderRISCV64::VisitLoadException(HLoadException* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadException(HLoadException* instruction) {
+  XRegister out = instruction->GetLocations()->Out().AsRegister<XRegister>();
+  __ Loadwu(out, TR, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) {
+  InvokeRuntimeCallingConvention calling_convention;
+  Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+  CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(instruction, loc, loc);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadMethodHandle(HLoadMethodHandle* instruction) {
+  codegen_->GenerateLoadMethodHandleRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) {
+  InvokeRuntimeCallingConvention calling_convention;
+  Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+  CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(instruction, loc, loc);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLoadMethodType(HLoadMethodType* instruction) {
+  codegen_->GenerateLoadMethodTypeRuntimeCall(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitLoadString(HLoadString* instruction) {
+  HLoadString::LoadKind load_kind = instruction->GetLoadKind();
+  LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(instruction);
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
+    InvokeRuntimeCallingConvention calling_convention;
+    DCHECK_EQ(DataType::Type::kReference, instruction->GetType());
+    locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+  } else {
+    locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!gUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and marking to save everything we need.
+        locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+      } else {
+        // For non-Baker read barriers we have a temp-clobbering call.
+      }
+    }
+  }
+}
+
+// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
+// move.
+void InstructionCodeGeneratorRISCV64::VisitLoadString(HLoadString* instruction)
+    NO_THREAD_SAFETY_ANALYSIS {
+  HLoadString::LoadKind load_kind = instruction->GetLoadKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location out_loc = locations->Out();
+  XRegister out = out_loc.AsRegister<XRegister>();
+
+  switch (load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
+             codegen_->GetCompilerOptions().IsBootImageExtension());
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewBootImageStringPatch(
+          instruction->GetDexFile(), instruction->GetStringIndex());
+      codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewBootImageStringPatch(
+          instruction->GetDexFile(), instruction->GetStringIndex(), info_high);
+      codegen_->EmitPcRelativeAddiPlaceholder(info_low, out, out);
+      return;
+    }
+    case HLoadString::LoadKind::kBootImageRelRo: {
+      DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      uint32_t boot_image_offset = codegen_->GetBootImageOffset(instruction);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+          codegen_->NewBootImageRelRoPatch(boot_image_offset);
+      codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+          codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
+      codegen_->EmitPcRelativeLwuPlaceholder(info_low, out, out);
+      return;
+    }
+    case HLoadString::LoadKind::kBssEntry: {
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_high = codegen_->NewStringBssEntryPatch(
+          instruction->GetDexFile(), instruction->GetStringIndex());
+      codegen_->EmitPcRelativeAuipcPlaceholder(info_high, out);
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_low = codegen_->NewStringBssEntryPatch(
+          instruction->GetDexFile(), instruction->GetStringIndex(), info_high);
+      GenerateGcRootFieldLoad(instruction,
+                              out_loc,
+                              out,
+                              /* offset= */ kLinkTimeOffsetPlaceholderLow,
+                              GetCompilerReadBarrierOption(),
+                              &info_low->label);
+      SlowPathCodeRISCV64* slow_path =
+          new (codegen_->GetScopedAllocator()) LoadStringSlowPathRISCV64(instruction);
+      codegen_->AddSlowPath(slow_path);
+      __ Beqz(out, slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+    case HLoadString::LoadKind::kJitBootImageAddress: {
+      uint32_t address = reinterpret_cast32<uint32_t>(instruction->GetString().Get());
+      DCHECK_NE(address, 0u);
+      __ Loadwu(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+      return;
+    }
+    case HLoadString::LoadKind::kJitTableAddress:
+      __ Loadwu(
+          out,
+          codegen_->DeduplicateJitStringLiteral(
+              instruction->GetDexFile(), instruction->GetStringIndex(), instruction->GetString()));
+      GenerateGcRootFieldLoad(instruction, out_loc, out, 0, GetCompilerReadBarrierOption());
+      return;
+    default:
+      break;
+  }
+
+  DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
+  InvokeRuntimeCallingConvention calling_convention;
+  DCHECK(calling_convention.GetReturnLocation(DataType::Type::kReference).Equals(out_loc));
+  __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetStringIndex().index_);
+  codegen_->InvokeRuntime(kQuickResolveString, instruction, instruction->GetDexPc());
+  CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+}
+
+void LocationsBuilderRISCV64::VisitLongConstant(HLongConstant* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitLongConstant(
+    [[maybe_unused]] HLongConstant* instruction) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitMax(HMax* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMax(HMax* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) {
+  instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMemoryBarrier(HMemoryBarrier* instruction) {
+  codegen_->GenerateMemoryBarrier(instruction->GetBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMethodExitHook(HMethodExitHook* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitMin(HMin* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMin(HMin* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+      instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMonitorOperation(HMonitorOperation* instruction) {
+  codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
+                          instruction,
+                          instruction->GetDexPc());
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitMul(HMul* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  switch (instruction->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitMul(HMul* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  switch (instruction->GetResultType()) {
+    case DataType::Type::kInt32:
+      __ Mulw(locations->Out().AsRegister<XRegister>(),
+              locations->InAt(0).AsRegister<XRegister>(),
+              locations->InAt(1).AsRegister<XRegister>());
+      break;
+
+    case DataType::Type::kInt64:
+      __ Mul(locations->Out().AsRegister<XRegister>(),
+             locations->InAt(0).AsRegister<XRegister>(),
+             locations->InAt(1).AsRegister<XRegister>());
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      FMul(locations->Out().AsFpuRegister<FRegister>(),
+           locations->InAt(0).AsFpuRegister<FRegister>(),
+           locations->InAt(1).AsFpuRegister<FRegister>(),
+           instruction->GetResultType());
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected mul type " << instruction->GetResultType();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitNeg(HNeg* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  switch (instruction->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNeg(HNeg* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  switch (instruction->GetResultType()) {
+    case DataType::Type::kInt32:
+      __ NegW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+      break;
+
+    case DataType::Type::kInt64:
+      __ Neg(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64:
+      FNeg(locations->Out().AsFpuRegister<FRegister>(),
+           locations->InAt(0).AsFpuRegister<FRegister>(),
+           instruction->GetResultType());
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << instruction->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitNewArray(HNewArray* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNewArray(HNewArray* instruction) {
+  QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
+  codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+  DCHECK(!codegen_->IsLeafMethod());
+}
+
+void LocationsBuilderRISCV64::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
+      instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNewInstance(HNewInstance* instruction) {
+  codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
+void LocationsBuilderRISCV64::VisitNop(HNop* instruction) {
+  new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNop([[maybe_unused]] HNop* instruction) {
+  // The environment recording already happened in CodeGenerator::Compile.
+}
+
+void LocationsBuilderRISCV64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNot(HNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  switch (instruction->GetResultType()) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      __ Not(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitNotEqual(HNotEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNotEqual(HNotEqual* instruction) {
+  HandleCondition(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitNullConstant(HNullConstant* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetOut(Location::ConstantLocation(instruction));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNullConstant(
+    [[maybe_unused]] HNullConstant* instruction) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderRISCV64::VisitNullCheck(HNullCheck* instruction) {
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitNullCheck(HNullCheck* instruction) {
+  codegen_->GenerateNullCheck(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitOr(HOr* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPackedSwitch(HPackedSwitch* instruction) {
+  int32_t lower_bound = instruction->GetStartValue();
+  uint32_t num_entries = instruction->GetNumEntries();
+  LocationSummary* locations = instruction->GetLocations();
+  XRegister value = locations->InAt(0).AsRegister<XRegister>();
+  HBasicBlock* switch_block = instruction->GetBlock();
+  HBasicBlock* default_block = instruction->GetDefaultBlock();
+
+  // Prepare a temporary register and an adjusted zero-based value.
+  ScratchRegisterScope srs(GetAssembler());
+  XRegister temp = srs.AllocateXRegister();
+  XRegister adjusted = value;
+  if (lower_bound != 0) {
+    adjusted = temp;
+    __ AddConst32(temp, value, -lower_bound);
+  }
+
+  // Jump to the default block if the index is out of the packed switch value range.
+  // Note: We could save one instruction for `num_entries == 1` with BNEZ but the
+  // `HInstructionBuilder` transforms that case to an `HIf`, so let's keep the code simple.
+  CHECK_NE(num_entries, 0u);  // `HInstructionBuilder` creates a `HGoto` for empty packed-switch.
+  {
+    ScratchRegisterScope srs2(GetAssembler());
+    XRegister temp2 = srs2.AllocateXRegister();
+    __ LoadConst32(temp2, num_entries);
+    __ Bgeu(adjusted, temp2, codegen_->GetLabelOf(default_block));  // Can clobber `TMP` if taken.
+  }
+
+  if (num_entries >= kPackedSwitchCompareJumpThreshold) {
+    GenTableBasedPackedSwitch(adjusted, temp, num_entries, switch_block);
+  } else {
+    GenPackedSwitchWithCompares(adjusted, temp, num_entries, switch_block);
+  }
+}
+
+void LocationsBuilderRISCV64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitParallelMove(HParallelMove* instruction) {
+  if (instruction->GetNext()->IsSuspendCheck() &&
+      instruction->GetBlock()->GetLoopInformation() != nullptr) {
+    HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
+    // The back edge will generate the suspend check.
+    codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
+  }
+
+  codegen_->GetMoveResolver()->EmitNativeCode(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitParameterValue(
+    [[maybe_unused]] HParameterValue* instruction) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderRISCV64::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitPhi([[maybe_unused]] HPhi* instruction) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void LocationsBuilderRISCV64::VisitRem(HRem* instruction) {
+  DataType::Type type = instruction->GetResultType();
+  LocationSummary::CallKind call_kind =
+      DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
+                                          : LocationSummary::kNoCall;
+  LocationSummary* locations =
+      new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
+
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64: {
+      InvokeRuntimeCallingConvention calling_convention;
+      locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(calling_convention.GetReturnLocation(type));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitRem(HRem* instruction) {
+  DataType::Type type = instruction->GetType();
+
+  switch (type) {
+    case DataType::Type::kInt32:
+    case DataType::Type::kInt64:
+      GenerateDivRemIntegral(instruction);
+      break;
+
+    case DataType::Type::kFloat32:
+    case DataType::Type::kFloat64: {
+      QuickEntrypointEnum entrypoint =
+          (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
+      codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
+      if (type == DataType::Type::kFloat32) {
+        CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      } else {
+        CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitReturn(HReturn* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  DataType::Type return_type = instruction->InputAt(0)->GetType();
+  DCHECK_NE(return_type, DataType::Type::kVoid);
+  locations->SetInAt(0, Riscv64ReturnLocation(return_type));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitReturn(HReturn* instruction) {
+  if (GetGraph()->IsCompilingOsr()) {
+    // To simplify callers of an OSR method, we put a floating point return value
+    // in both floating point and core return registers.
+    switch (instruction->InputAt(0)->GetType()) {
+      case DataType::Type::kFloat32:
+        __ FMvXW(A0, FA0);
+        break;
+      case DataType::Type::kFloat64:
+        __ FMvXD(A0, FA0);
+        break;
+      default:
+        break;
+    }
+  }
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderRISCV64::VisitReturnVoid(HReturnVoid* instruction) {
+  instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitReturnVoid([[maybe_unused]] HReturnVoid* instruction) {
+  codegen_->GenerateFrameExit();
+}
+
+void LocationsBuilderRISCV64::VisitRor(HRor* instruction) {
+  HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitRor(HRor* instruction) {
+  HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitShl(HShl* instruction) {
+  HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShl(HShl* instruction) {
+  HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitShr(HShr* instruction) {
+  HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitShr(HShr* instruction) {
+  HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction,
+                 instruction->GetFieldInfo(),
+                 instruction->GetValueCanBeNull(),
+                 instruction->GetWriteBarrierKind());
+}
+
+void LocationsBuilderRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+  codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(A0));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
+  __ LoadConst32(A0, instruction->GetFormat()->GetValue());
+  codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionRISCV64 calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderRISCV64::VisitSelect(HSelect* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSelect(HSelect* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSub(HSub* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+  // In suspend check slow path, usually there are no caller-save registers at all.
+  // If SIMD instructions are present, however, we force spilling all live SIMD
+  // registers in full width (since the runtime only saves/restores lower part).
+  locations->SetCustomSlowPathCallerSaves(GetGraph()->HasSIMD() ? RegisterSet::AllFpu() :
+                                                                  RegisterSet::Empty());
+}
+
+void InstructionCodeGeneratorRISCV64::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void LocationsBuilderRISCV64::VisitThrow(HThrow* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConvention calling_convention;
+  locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorRISCV64::VisitThrow(HThrow* instruction) {
+  codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+void LocationsBuilderRISCV64::VisitTryBoundary(HTryBoundary* instruction) {
+  instruction->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitTryBoundary(HTryBoundary* instruction) {
+  HBasicBlock* successor = instruction->GetNormalFlowSuccessor();
+  if (!successor->IsExitBlock()) {
+    HandleGoto(instruction, successor);
+  }
+}
+
+void LocationsBuilderRISCV64::VisitTypeConversion(HTypeConversion* instruction) {
+  DataType::Type input_type = instruction->GetInputType();
+  DataType::Type result_type = instruction->GetResultType();
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
+
+  if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
+      (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
+    LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+  if (DataType::IsFloatingPointType(input_type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+
+  if (DataType::IsFloatingPointType(result_type)) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  }
+}
+
+void InstructionCodeGeneratorRISCV64::VisitTypeConversion(HTypeConversion* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DataType::Type result_type = instruction->GetResultType();
+  DataType::Type input_type = instruction->GetInputType();
+
+  DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
+      << input_type << " -> " << result_type;
+
+  if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
+    XRegister dst = locations->Out().AsRegister<XRegister>();
+    XRegister src = locations->InAt(0).AsRegister<XRegister>();
+    switch (result_type) {
+      case DataType::Type::kUint8:
+        __ Andi(dst, src, 0xFF);
+        break;
+      case DataType::Type::kInt8:
+        __ SextB(dst, src);
+        break;
+      case DataType::Type::kUint16:
+        __ ZextH(dst, src);
+        break;
+      case DataType::Type::kInt16:
+        __ SextH(dst, src);
+        break;
+      case DataType::Type::kInt32:
+      case DataType::Type::kInt64:
+        // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int
+        // conversions, except when the input and output registers are the same and we are not
+        // converting longs to shorter types. In these cases, do nothing.
+        if ((input_type == DataType::Type::kInt64) || (dst != src)) {
+          __ Addiw(dst, src, 0);
+        }
+        break;
+
+      default:
+        LOG(FATAL) << "Unexpected type conversion from " << input_type
+                   << " to " << result_type;
+        UNREACHABLE();
+    }
+  } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
+    FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+    XRegister src = locations->InAt(0).AsRegister<XRegister>();
+    if (input_type == DataType::Type::kInt64) {
+      if (result_type == DataType::Type::kFloat32) {
+        __ FCvtSL(dst, src, FPRoundingMode::kRNE);
+      } else {
+        __ FCvtDL(dst, src, FPRoundingMode::kRNE);
+      }
+    } else {
+      if (result_type == DataType::Type::kFloat32) {
+        __ FCvtSW(dst, src, FPRoundingMode::kRNE);
+      } else {
+        __ FCvtDW(dst, src);  // No rounding.
+      }
+    }
+  } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
+    CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
+    XRegister dst = locations->Out().AsRegister<XRegister>();
+    FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+    if (result_type == DataType::Type::kInt64) {
+      if (input_type == DataType::Type::kFloat32) {
+        __ FCvtLS(dst, src, FPRoundingMode::kRTZ);
+      } else {
+        __ FCvtLD(dst, src, FPRoundingMode::kRTZ);
+      }
+    } else {
+      if (input_type == DataType::Type::kFloat32) {
+        __ FCvtWS(dst, src, FPRoundingMode::kRTZ);
+      } else {
+        __ FCvtWD(dst, src, FPRoundingMode::kRTZ);
+      }
+    }
+  } else if (DataType::IsFloatingPointType(result_type) &&
+             DataType::IsFloatingPointType(input_type)) {
+    FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+    FRegister src = locations->InAt(0).AsFpuRegister<FRegister>();
+    if (result_type == DataType::Type::kFloat32) {
+      __ FCvtSD(dst, src);
+    } else {
+      __ FCvtDS(dst, src);
+    }
+  } else {
+    LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
+                << " to " << result_type;
+    UNREACHABLE();
+  }
+}
+
+void LocationsBuilderRISCV64::VisitUShr(HUShr* instruction) {
+  HandleShift(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitUShr(HUShr* instruction) {
+  HandleShift(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void InstructionCodeGeneratorRISCV64::VisitXor(HXor* instruction) {
+  HandleBinaryOp(instruction);
+}
+
+void LocationsBuilderRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecReduce(HVecReduce* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecReduce(HVecReduce* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecCnv(HVecCnv* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecCnv(HVecCnv* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecNeg(HVecNeg* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecNeg(HVecNeg* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAbs(HVecAbs* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAbs(HVecAbs* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecNot(HVecNot* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecNot(HVecNot* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAdd(HVecAdd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAdd(HVecAdd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSub(HVecSub* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSub(HVecSub* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMul(HVecMul* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMul(HVecMul* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecDiv(HVecDiv* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecDiv(HVecDiv* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMin(HVecMin* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMin(HVecMin* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMax(HVecMax* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMax(HVecMax* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAnd(HVecAnd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAnd(HVecAnd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecAndNot(HVecAndNot* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecAndNot(HVecAndNot* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecOr(HVecOr* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecOr(HVecOr* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecXor(HVecXor* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecXor(HVecXor* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecShl(HVecShl* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecShl(HVecShl* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecShr(HVecShr* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecShr(HVecShr* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecUShr(HVecUShr* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecUShr(HVecUShr* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecMultiplyAccumulate(
+    HVecMultiplyAccumulate* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecDotProd(HVecDotProd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecDotProd(HVecDotProd* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecLoad(HVecLoad* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecLoad(HVecLoad* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecStore(HVecStore* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecStore(HVecStore* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredSetAll(HVecPredSetAll* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredWhile(HVecPredWhile* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecCondition(HVecCondition* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecCondition(HVecCondition* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void LocationsBuilderRISCV64::VisitVecPredNot(HVecPredNot* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+void InstructionCodeGeneratorRISCV64::VisitVecPredNot(HVecPredNot* instruction) {
+  UNUSED(instruction);
+  LOG(FATAL) << "Unimplemented";
+}
+
+namespace detail {
+
+// Mark which intrinsics we don't have handcrafted code for.
+template <Intrinsics T>
+struct IsUnimplemented {
+  bool is_unimplemented = false;
+};
+
+#define TRUE_OVERRIDE(Name)                     \
+  template <>                                   \
+  struct IsUnimplemented<Intrinsics::k##Name> { \
+    bool is_unimplemented = true;               \
+  };
+UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(TRUE_OVERRIDE)
+#undef TRUE_OVERRIDE
+
+static constexpr bool kIsIntrinsicUnimplemented[] = {
+    false,  // kNone
+#define IS_UNIMPLEMENTED(Intrinsic, ...) \
+    IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+    ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
+#undef IS_UNIMPLEMENTED
+};
+
+}  // namespace detail
+
+CodeGeneratorRISCV64::CodeGeneratorRISCV64(HGraph* graph,
+                                           const CompilerOptions& compiler_options,
+                                           OptimizingCompilerStats* stats)
+    : CodeGenerator(graph,
+                    kNumberOfXRegisters,
+                    kNumberOfFRegisters,
+                    /*number_of_register_pairs=*/ 0u,
+                    ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves)),
+                    ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
+                    compiler_options,
+                    stats,
+                    ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
+      assembler_(graph->GetAllocator(),
+                 compiler_options.GetInstructionSetFeatures()->AsRiscv64InstructionSetFeatures()),
+      location_builder_(graph, this),
+      instruction_visitor_(graph, this),
+      block_labels_(nullptr),
+      move_resolver_(graph->GetAllocator(), this),
+      uint32_literals_(std::less<uint32_t>(),
+                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      uint64_literals_(std::less<uint64_t>(),
+                       graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      jit_string_patches_(StringReferenceValueComparator(),
+                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+      jit_class_patches_(TypeReferenceValueComparator(),
+                         graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+  // Always mark the RA register to be saved.
+  AddAllocatedRegister(Location::RegisterLocation(RA));
+}
+
+void CodeGeneratorRISCV64::MaybeIncrementHotness(bool is_frame_entry) {
+  if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister method = is_frame_entry ? kArtMethodRegister : srs.AllocateXRegister();
+    if (!is_frame_entry) {
+      __ Loadd(method, SP, 0);
+    }
+    XRegister counter = srs.AllocateXRegister();
+    __ Loadhu(counter, method, ArtMethod::HotnessCountOffset().Int32Value());
+    Riscv64Label done;
+    DCHECK_EQ(0u, interpreter::kNterpHotnessValue);
+    __ Beqz(counter, &done);  // Can clobber `TMP` if taken.
+    __ Addi(counter, counter, -1);
+    // We may not have another scratch register available for `Storeh`()`,
+    // so we must use the `Sh()` function directly.
+    static_assert(IsInt<12>(ArtMethod::HotnessCountOffset().Int32Value()));
+    __ Sh(counter, method, ArtMethod::HotnessCountOffset().Int32Value());
+    __ Bind(&done);
+  }
+
+  if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+    SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathRISCV64();
+    AddSlowPath(slow_path);
+    ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+    DCHECK(info != nullptr);
+    DCHECK(!HasEmptyFrame());
+    uint64_t address = reinterpret_cast64<uint64_t>(info);
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    __ LoadConst64(tmp, address);
+    XRegister counter = srs.AllocateXRegister();
+    __ Loadhu(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value());
+    __ Beqz(counter, slow_path->GetEntryLabel());  // Can clobber `TMP` if taken.
+    __ Addi(counter, counter, -1);
+    // We do not have another scratch register available for `Storeh`()`,
+    // so we must use the `Sh()` function directly.
+    static_assert(IsInt<12>(ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+    __ Sh(counter, tmp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value());
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+bool CodeGeneratorRISCV64::CanUseImplicitSuspendCheck() const {
+  // TODO(riscv64): Implement implicit suspend checks to reduce code size.
+  return false;
+}
+
+void CodeGeneratorRISCV64::GenerateMemoryBarrier(MemBarrierKind kind) {
+  switch (kind) {
+    case MemBarrierKind::kAnyAny:
+      __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceRead | kFenceWrite);
+      break;
+    case MemBarrierKind::kAnyStore:
+      __ Fence(/*pred=*/ kFenceRead | kFenceWrite, /*succ=*/ kFenceWrite);
+      break;
+    case MemBarrierKind::kLoadAny:
+      __ Fence(/*pred=*/ kFenceRead, /*succ=*/ kFenceRead | kFenceWrite);
+      break;
+    case MemBarrierKind::kStoreStore:
+      __ Fence(/*pred=*/ kFenceWrite, /*succ=*/ kFenceWrite);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected memory barrier " << kind;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorRISCV64::GenerateFrameEntry() {
+  // Check if we need to generate the clinit check. We will jump to the
+  // resolution stub if the class is not initialized and the executing thread is
+  // not the thread initializing it.
+  // We do this before constructing the frame to get the correct stack trace if
+  // an exception is thrown.
+  if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
+    Riscv64Label resolution;
+    Riscv64Label memory_barrier;
+
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    XRegister tmp2 = srs.AllocateXRegister();
+
+    // We don't emit a read barrier here to save on code size. We rely on the
+    // resolution trampoline to do a clinit check before re-entering this code.
+    __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value());
+
+    // We shall load the full 32-bit status word with sign-extension and compare as unsigned
+    // to sign-extended shifted status values. This yields the same comparison as loading and
+    // materializing unsigned but the constant is materialized with a single LUI instruction.
+    __ Loadw(tmp, tmp2, mirror::Class::StatusOffset().SizeValue());  // Sign-extended.
+
+    // Check if we're visibly initialized.
+    __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kVisiblyInitialized>());
+    __ Bgeu(tmp, tmp2, &frame_entry_label_);  // Can clobber `TMP` if taken.
+
+    // Check if we're initialized and jump to code that does a memory barrier if so.
+    __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitialized>());
+    __ Bgeu(tmp, tmp2, &memory_barrier);  // Can clobber `TMP` if taken.
+
+    // Check if we're initializing and the thread initializing is the one
+    // executing the code.
+    __ Li(tmp2, ShiftedSignExtendedClassStatusValue<ClassStatus::kInitializing>());
+    __ Bltu(tmp, tmp2, &resolution);  // Can clobber `TMP` if taken.
+
+    __ Loadwu(tmp2, kArtMethodRegister, ArtMethod::DeclaringClassOffset().Int32Value());
+    __ Loadw(tmp, tmp2, mirror::Class::ClinitThreadIdOffset().Int32Value());
+    __ Loadw(tmp2, TR, Thread::TidOffset<kRiscv64PointerSize>().Int32Value());
+    __ Beq(tmp, tmp2, &frame_entry_label_);
+    __ Bind(&resolution);
+
+    // Jump to the resolution stub.
+    ThreadOffset64 entrypoint_offset =
+        GetThreadOffset<kRiscv64PointerSize>(kQuickQuickResolutionTrampoline);
+    __ Loadd(tmp, TR, entrypoint_offset.Int32Value());
+    __ Jr(tmp);
+
+    __ Bind(&memory_barrier);
+    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+  }
+  __ Bind(&frame_entry_label_);
+
+  bool do_overflow_check =
+      FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kRiscv64) || !IsLeafMethod();
+
+  if (do_overflow_check) {
+    DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+    __ Loadw(
+        Zero, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kRiscv64)));
+    RecordPcInfo(nullptr, 0);
+  }
+
+  if (!HasEmptyFrame()) {
+    // Make sure the frame size isn't unreasonably large.
+    if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kRiscv64)) {
+      LOG(FATAL) << "Stack frame larger than "
+                 << GetStackOverflowReservedBytes(InstructionSet::kRiscv64) << " bytes";
+    }
+
+    // Spill callee-saved registers.
+
+    uint32_t frame_size = GetFrameSize();
+
+    IncreaseFrame(frame_size);
+
+    uint32_t offset = frame_size;
+    for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) {
+      --i;
+      XRegister reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        offset -= kRiscv64DoublewordSize;
+        __ Stored(reg, SP, offset);
+        __ cfi().RelOffset(dwarf::Reg::Riscv64Core(reg), offset);
+      }
+    }
+
+    for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) {
+      --i;
+      FRegister reg = kFpuCalleeSaves[i];
+      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+        offset -= kRiscv64DoublewordSize;
+        __ FStored(reg, SP, offset);
+        __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(reg), offset);
+      }
+    }
+
+    // Save the current method if we need it. Note that we do not
+    // do this in HCurrentMethod, as the instruction might have been removed
+    // in the SSA graph.
+    if (RequiresCurrentMethod()) {
+      __ Stored(kArtMethodRegister, SP, 0);
+    }
+
+    if (GetGraph()->HasShouldDeoptimizeFlag()) {
+      // Initialize should_deoptimize flag to 0.
+      __ Storew(Zero, SP, GetStackOffsetOfShouldDeoptimizeFlag());
+    }
+  }
+  MaybeIncrementHotness(/*is_frame_entry=*/ true);
+}
+
+void CodeGeneratorRISCV64::GenerateFrameExit() {
+  __ cfi().RememberState();
+
+  if (!HasEmptyFrame()) {
+    // Restore callee-saved registers.
+
+    // For better instruction scheduling restore RA before other registers.
+    uint32_t offset = GetFrameSize();
+    for (size_t i = arraysize(kCoreCalleeSaves); i != 0; ) {
+      --i;
+      XRegister reg = kCoreCalleeSaves[i];
+      if (allocated_registers_.ContainsCoreRegister(reg)) {
+        offset -= kRiscv64DoublewordSize;
+        __ Loadd(reg, SP, offset);
+        __ cfi().Restore(dwarf::Reg::Riscv64Core(reg));
+      }
+    }
+
+    for (size_t i = arraysize(kFpuCalleeSaves); i != 0; ) {
+      --i;
+      FRegister reg = kFpuCalleeSaves[i];
+      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+        offset -= kRiscv64DoublewordSize;
+        __ FLoadd(reg, SP, offset);
+        __ cfi().Restore(dwarf::Reg::Riscv64Fp(reg));
+      }
+    }
+
+    DecreaseFrame(GetFrameSize());
+  }
+
+  __ Jr(RA);
+
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(GetFrameSize());
+}
+
+void CodeGeneratorRISCV64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); }
+
+void CodeGeneratorRISCV64::MoveConstant(Location destination, int32_t value) {
+  DCHECK(destination.IsRegister());
+  __ LoadConst32(destination.AsRegister<XRegister>(), value);
+}
+
+void CodeGeneratorRISCV64::MoveLocation(Location destination,
+                                        Location source,
+                                        DataType::Type dst_type) {
+  if (source.Equals(destination)) {
+    return;
+  }
+
+  // A valid move type can always be inferred from the destination and source locations.
+  // When moving from and to a register, the `dst_type` can be used to generate 32-bit instead
+  // of 64-bit moves but it's generally OK to use 64-bit moves for 32-bit values in registers.
+  bool unspecified_type = (dst_type == DataType::Type::kVoid);
+  // TODO(riscv64): Is the destination type known in all cases?
+  // TODO(riscv64): Can unspecified `dst_type` move 32-bit GPR to FPR without NaN-boxing?
+  CHECK(!unspecified_type);
+
+  if (destination.IsRegister() || destination.IsFpuRegister()) {
+    if (unspecified_type) {
+      HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
+      if (source.IsStackSlot() ||
+          (src_cst != nullptr &&
+           (src_cst->IsIntConstant() || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) {
+        // For stack slots and 32-bit constants, a 32-bit type is appropriate.
+        dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
+      } else {
+        // If the source is a double stack slot or a 64-bit constant, a 64-bit type
+        // is appropriate. Else the source is a register, and since the type has not
+        // been specified, we chose a 64-bit type to force a 64-bit move.
+        dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
+      }
+    }
+    DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
+           (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
+
+    if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
+      // Move to GPR/FPR from stack
+      if (DataType::IsFloatingPointType(dst_type)) {
+        if (DataType::Is64BitType(dst_type)) {
+          __ FLoadd(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+        } else {
+          __ FLoadw(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex());
+        }
+      } else {
+        if (DataType::Is64BitType(dst_type)) {
+          __ Loadd(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+        } else if (dst_type == DataType::Type::kReference) {
+          __ Loadwu(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+        } else {
+          __ Loadw(destination.AsRegister<XRegister>(), SP, source.GetStackIndex());
+        }
+      }
+    } else if (source.IsConstant()) {
+      // Move to GPR/FPR from constant
+      // TODO(riscv64): Consider using literals for difficult-to-materialize 64-bit constants.
+      int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister gpr = DataType::IsFloatingPointType(dst_type)
+          ? srs.AllocateXRegister()
+          : destination.AsRegister<XRegister>();
+      if (DataType::IsFloatingPointType(dst_type) && value == 0) {
+        gpr = Zero;  // Note: The scratch register allocated above shall not be used.
+      } else {
+        // Note: For `float` we load the sign-extended value here as it can sometimes yield
+        // a shorter instruction sequence. The higher 32 bits shall be ignored during the
+        // transfer to FP reg and the result shall be correctly NaN-boxed.
+        __ LoadConst64(gpr, value);
+      }
+      if (dst_type == DataType::Type::kFloat32) {
+        __ FMvWX(destination.AsFpuRegister<FRegister>(), gpr);
+      } else if (dst_type == DataType::Type::kFloat64) {
+        __ FMvDX(destination.AsFpuRegister<FRegister>(), gpr);
+      }
+    } else if (source.IsRegister()) {
+      if (destination.IsRegister()) {
+        // Move to GPR from GPR
+        __ Mv(destination.AsRegister<XRegister>(), source.AsRegister<XRegister>());
+      } else {
+        DCHECK(destination.IsFpuRegister());
+        if (DataType::Is64BitType(dst_type)) {
+          __ FMvDX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>());
+        } else {
+          __ FMvWX(destination.AsFpuRegister<FRegister>(), source.AsRegister<XRegister>());
+        }
+      }
+    } else if (source.IsFpuRegister()) {
+      if (destination.IsFpuRegister()) {
+        if (GetGraph()->HasSIMD()) {
+          LOG(FATAL) << "Vector extension is unsupported";
+          UNREACHABLE();
+        } else {
+          // Move to FPR from FPR
+          if (dst_type == DataType::Type::kFloat32) {
+            __ FMvS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+          } else {
+            DCHECK_EQ(dst_type, DataType::Type::kFloat64);
+            __ FMvD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>());
+          }
+        }
+      } else {
+        DCHECK(destination.IsRegister());
+        if (DataType::Is64BitType(dst_type)) {
+          __ FMvXD(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>());
+        } else {
+          __ FMvXW(destination.AsRegister<XRegister>(), source.AsFpuRegister<FRegister>());
+        }
+      }
+    }
+  } else if (destination.IsSIMDStackSlot()) {
+    LOG(FATAL) << "SIMD is unsupported";
+    UNREACHABLE();
+  } else {  // The destination is not a register. It must be a stack slot.
+    DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
+    if (source.IsRegister() || source.IsFpuRegister()) {
+      if (unspecified_type) {
+        if (source.IsRegister()) {
+          dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
+        } else {
+          dst_type =
+              destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
+        }
+      }
+      DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
+             (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
+      // Move to stack from GPR/FPR
+      if (DataType::Is64BitType(dst_type)) {
+        if (source.IsRegister()) {
+          __ Stored(source.AsRegister<XRegister>(), SP, destination.GetStackIndex());
+        } else {
+          __ FStored(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+        }
+      } else {
+        if (source.IsRegister()) {
+          __ Storew(source.AsRegister<XRegister>(), SP, destination.GetStackIndex());
+        } else {
+          __ FStorew(source.AsFpuRegister<FRegister>(), SP, destination.GetStackIndex());
+        }
+      }
+    } else if (source.IsConstant()) {
+      // Move to stack from constant
+      int64_t value = GetInt64ValueOf(source.GetConstant());
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister gpr = (value != 0) ? srs.AllocateXRegister() : Zero;
+      if (value != 0) {
+        __ LoadConst64(gpr, value);
+      }
+      if (destination.IsStackSlot()) {
+        __ Storew(gpr, SP, destination.GetStackIndex());
+      } else {
+        DCHECK(destination.IsDoubleStackSlot());
+        __ Stored(gpr, SP, destination.GetStackIndex());
+      }
+    } else {
+      DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
+      DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
+      // Move to stack from stack
+      ScratchRegisterScope srs(GetAssembler());
+      XRegister tmp = srs.AllocateXRegister();
+      if (destination.IsStackSlot()) {
+        __ Loadw(tmp, SP, source.GetStackIndex());
+        __ Storew(tmp, SP, destination.GetStackIndex());
+      } else {
+        __ Loadd(tmp, SP, source.GetStackIndex());
+        __ Stored(tmp, SP, destination.GetStackIndex());
+      }
+    }
+  }
+}
+
+void CodeGeneratorRISCV64::AddLocationAsTemp(Location location, LocationSummary* locations) {
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
+}
+
+void CodeGeneratorRISCV64::SetupBlockedRegisters() const {
+  // ZERO, GP, SP, RA, TP and TR(S1) are reserved and can't be allocated.
+  blocked_core_registers_[Zero] = true;
+  blocked_core_registers_[GP] = true;
+  blocked_core_registers_[SP] = true;
+  blocked_core_registers_[RA] = true;
+  blocked_core_registers_[TP] = true;
+  blocked_core_registers_[TR] = true;  // ART Thread register.
+
+  // TMP(T6), TMP2(T5) and FTMP(FT11) are used as temporary/scratch registers.
+  blocked_core_registers_[TMP] = true;
+  blocked_core_registers_[TMP2] = true;
+  blocked_fpu_registers_[FTMP] = true;
+
+  if (GetGraph()->IsDebuggable()) {
+    // Stubs do not save callee-save floating point registers. If the graph
+    // is debuggable, we need to deal with these registers differently. For
+    // now, just block them.
+    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
+      blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
+    }
+  }
+}
+
+size_t CodeGeneratorRISCV64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ Stored(XRegister(reg_id), SP, stack_index);
+  return kRiscv64DoublewordSize;
+}
+
+size_t CodeGeneratorRISCV64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
+  __ Loadd(XRegister(reg_id), SP, stack_index);
+  return kRiscv64DoublewordSize;
+}
+
+size_t CodeGeneratorRISCV64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  if (GetGraph()->HasSIMD()) {
+    // TODO(riscv64): RISC-V vector extension.
+    UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+    UNREACHABLE();
+  }
+  __ FStored(FRegister(reg_id), SP, stack_index);
+  return kRiscv64FloatRegSizeInBytes;
+}
+
+size_t CodeGeneratorRISCV64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  if (GetGraph()->HasSIMD()) {
+    // TODO(riscv64): RISC-V vector extension.
+    UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+    UNREACHABLE();
+  }
+  __ FLoadd(FRegister(reg_id), SP, stack_index);
+  return kRiscv64FloatRegSizeInBytes;
+}
+
+void CodeGeneratorRISCV64::DumpCoreRegister(std::ostream& stream, int reg) const {
+  stream << XRegister(reg);
+}
+
+void CodeGeneratorRISCV64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
+  stream << FRegister(reg);
+}
+
+void CodeGeneratorRISCV64::Finalize() {
+  // Ensure that we fix up branches and literal loads and emit the literal pool.
+  __ FinalizeCode();
+
+  // Adjust native pc offsets in stack maps.
+  StackMapStream* stack_map_stream = GetStackMapStream();
+  for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) {
+    uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i);
+    uint32_t new_position = __ GetAdjustedPosition(old_position);
+    DCHECK_GE(new_position, old_position);
+    stack_map_stream->SetStackMapNativePcOffset(i, new_position);
+  }
+
+  // Adjust pc offsets for the disassembly information.
+  if (disasm_info_ != nullptr) {
+    GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+    frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+    frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+    for (auto& entry : *disasm_info_->GetInstructionIntervals()) {
+      entry.second.start = __ GetAdjustedPosition(entry.second.start);
+      entry.second.end = __ GetAdjustedPosition(entry.second.end);
+    }
+    for (auto& entry : *disasm_info_->GetSlowPathIntervals()) {
+      entry.code_interval.start = __ GetAdjustedPosition(entry.code_interval.start);
+      entry.code_interval.end = __ GetAdjustedPosition(entry.code_interval.end);
+    }
+  }
+}
+
+// Generate code to invoke a runtime entry point.
+void CodeGeneratorRISCV64::InvokeRuntime(QuickEntrypointEnum entrypoint,
+                                         HInstruction* instruction,
+                                         uint32_t dex_pc,
+                                         SlowPathCode* slow_path) {
+  ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+
+  ThreadOffset64 entrypoint_offset = GetThreadOffset<kRiscv64PointerSize>(entrypoint);
+
+  // TODO(riscv64): Reduce code size for AOT by using shared trampolines for slow path
+  // runtime calls across the entire oat file.
+  __ Loadd(RA, TR, entrypoint_offset.Int32Value());
+  __ Jalr(RA);
+  if (EntrypointRequiresStackMap(entrypoint)) {
+    RecordPcInfo(instruction, dex_pc, slow_path);
+  }
+}
+
+// Generate code to invoke a runtime entry point, but do not record
+// PC-related information in a stack map.
+void CodeGeneratorRISCV64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                               HInstruction* instruction,
+                                                               SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  __ Loadd(RA, TR, entry_point_offset);
+  __ Jalr(RA);
+}
+
+void CodeGeneratorRISCV64::IncreaseFrame(size_t adjustment) {
+  int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment);
+  __ AddConst64(SP, SP, -adjustment32);
+  GetAssembler()->cfi().AdjustCFAOffset(adjustment32);
+}
+
+void CodeGeneratorRISCV64::DecreaseFrame(size_t adjustment) {
+  int32_t adjustment32 = dchecked_integral_cast<int32_t>(adjustment);
+  __ AddConst64(SP, SP, adjustment32);
+  GetAssembler()->cfi().AdjustCFAOffset(-adjustment32);
+}
+
+void CodeGeneratorRISCV64::GenerateNop() {
+  __ Nop();
+}
+
+void CodeGeneratorRISCV64::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
+    return;
+  }
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Lw(Zero, obj.AsRegister<XRegister>(), 0);
+  RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void CodeGeneratorRISCV64::GenerateExplicitNullCheck(HNullCheck* instruction) {
+  SlowPathCodeRISCV64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathRISCV64(instruction);
+  AddSlowPath(slow_path);
+
+  Location obj = instruction->GetLocations()->InAt(0);
+
+  __ Beqz(obj.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+}
+
+HLoadString::LoadKind CodeGeneratorRISCV64::GetSupportedLoadStringKind(
+    HLoadString::LoadKind desired_string_load_kind) {
+  switch (desired_string_load_kind) {
+    case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadString::LoadKind::kBootImageRelRo:
+    case HLoadString::LoadKind::kBssEntry:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kJitBootImageAddress:
+    case HLoadString::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadString::LoadKind::kRuntimeCall:
+      break;
+  }
+  return desired_string_load_kind;
+}
+
+HLoadClass::LoadKind CodeGeneratorRISCV64::GetSupportedLoadClassKind(
+    HLoadClass::LoadKind desired_class_load_kind) {
+  switch (desired_class_load_kind) {
+    case HLoadClass::LoadKind::kInvalid:
+      LOG(FATAL) << "UNREACHABLE";
+      UNREACHABLE();
+    case HLoadClass::LoadKind::kReferrersClass:
+      break;
+    case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
+    case HLoadClass::LoadKind::kBootImageRelRo:
+    case HLoadClass::LoadKind::kBssEntry:
+    case HLoadClass::LoadKind::kBssEntryPublic:
+    case HLoadClass::LoadKind::kBssEntryPackage:
+      DCHECK(!Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kJitBootImageAddress:
+    case HLoadClass::LoadKind::kJitTableAddress:
+      DCHECK(Runtime::Current()->UseJitCompilation());
+      break;
+    case HLoadClass::LoadKind::kRuntimeCall:
+      break;
+  }
+  return desired_class_load_kind;
+}
+
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorRISCV64::GetSupportedInvokeStaticOrDirectDispatch(
+    const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) {
+  UNUSED(method);
+  // On RISCV64 we support all dispatch types.
+  return desired_dispatch_info;
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageIntrinsicPatch(
+    uint32_t intrinsic_data, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(
+      /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_other_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageRelRoPatch(
+    uint32_t boot_image_offset, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(
+      /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_other_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageMethodPatch(
+    MethodReference target_method, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(
+      target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewMethodBssEntryPatch(
+    MethodReference target_method, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(
+      target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageTypePatch(
+    const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageJniEntrypointPatch(
+    MethodReference target_method, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(
+      target_method.dex_file, target_method.index, info_high, &boot_image_jni_entrypoint_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewTypeBssEntryPatch(
+    HLoadClass* load_class,
+    const PcRelativePatchInfo* info_high) {
+  const DexFile& dex_file = load_class->GetDexFile();
+  dex::TypeIndex type_index = load_class->GetTypeIndex();
+  ArenaDeque<PcRelativePatchInfo>* patches = nullptr;
+  switch (load_class->GetLoadKind()) {
+    case HLoadClass::LoadKind::kBssEntry:
+      patches = &type_bss_entry_patches_;
+      break;
+    case HLoadClass::LoadKind::kBssEntryPublic:
+      patches = &public_type_bss_entry_patches_;
+      break;
+    case HLoadClass::LoadKind::kBssEntryPackage:
+      patches = &package_type_bss_entry_patches_;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
+      UNREACHABLE();
+  }
+  return NewPcRelativePatch(&dex_file, type_index.index_, info_high, patches);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewBootImageStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &boot_image_string_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewStringBssEntryPatch(
+    const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) {
+  return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_);
+}
+
+CodeGeneratorRISCV64::PcRelativePatchInfo* CodeGeneratorRISCV64::NewPcRelativePatch(
+    const DexFile* dex_file,
+    uint32_t offset_or_index,
+    const PcRelativePatchInfo* info_high,
+    ArenaDeque<PcRelativePatchInfo>* patches) {
+  patches->emplace_back(dex_file, offset_or_index, info_high);
+  return &patches->back();
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateUint32Literal(uint32_t value) {
+  return uint32_literals_.GetOrCreate(value,
+                                      [this, value]() { return __ NewLiteral<uint32_t>(value); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateUint64Literal(uint64_t value) {
+  return uint64_literals_.GetOrCreate(value,
+                                      [this, value]() { return __ NewLiteral<uint64_t>(value); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateBootImageAddressLiteral(uint64_t address) {
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateJitStringLiteral(const DexFile& dex_file,
+                                                           dex::StringIndex string_index,
+                                                           Handle<mirror::String> handle) {
+  ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
+  return jit_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
+}
+
+Literal* CodeGeneratorRISCV64::DeduplicateJitClassLiteral(const DexFile& dex_file,
+                                                          dex::TypeIndex type_index,
+                                                          Handle<mirror::Class> handle) {
+  ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
+  return jit_class_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
+}
+
+void CodeGeneratorRISCV64::PatchJitRootUse(uint8_t* code,
+                                          const uint8_t* roots_data,
+                                          const Literal* literal,
+                                          uint64_t index_in_table) const {
+  uint32_t literal_offset = GetAssembler().GetLabelLocation(literal->GetLabel());
+  uintptr_t address =
+      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+  reinterpret_cast<uint32_t*>(code + literal_offset)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void CodeGeneratorRISCV64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+  for (const auto& entry : jit_string_patches_) {
+    const StringReference& string_reference = entry.first;
+    Literal* table_entry_literal = entry.second;
+    uint64_t index_in_table = GetJitStringRootIndex(string_reference);
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+  }
+  for (const auto& entry : jit_class_patches_) {
+    const TypeReference& type_reference = entry.first;
+    Literal* table_entry_literal = entry.second;
+    uint64_t index_in_table = GetJitClassRootIndex(type_reference);
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+  }
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high,
+                                                          XRegister out) {
+  DCHECK(info_high->pc_insn_label == &info_high->label);
+  __ Bind(&info_high->label);
+  __ Auipc(out, /*imm20=*/ kLinkTimeOffsetPlaceholderHigh);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low,
+                                                         XRegister rd,
+                                                         XRegister rs1) {
+  DCHECK(info_low->pc_insn_label != &info_low->label);
+  __ Bind(&info_low->label);
+  __ Addi(rd, rs1, /*imm12=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low,
+                                                        XRegister rd,
+                                                        XRegister rs1) {
+  DCHECK(info_low->pc_insn_label != &info_low->label);
+  __ Bind(&info_low->label);
+  __ Lwu(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+void CodeGeneratorRISCV64::EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low,
+                                                       XRegister rd,
+                                                       XRegister rs1) {
+  DCHECK(info_low->pc_insn_label != &info_low->label);
+  __ Bind(&info_low->label);
+  __ Ld(rd, rs1, /*offset=*/ kLinkTimeOffsetPlaceholderLow);
+}
+
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+inline void CodeGeneratorRISCV64::EmitPcRelativeLinkerPatches(
+    const ArenaDeque<PcRelativePatchInfo>& infos,
+    ArenaVector<linker::LinkerPatch>* linker_patches) {
+  for (const PcRelativePatchInfo& info : infos) {
+    linker_patches->push_back(Factory(__ GetLabelLocation(&info.label),
+                                      info.target_dex_file,
+                                      __ GetLabelLocation(info.pc_insn_label),
+                                      info.offset_or_index));
+  }
+}
+
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+                                     const DexFile* target_dex_file,
+                                     uint32_t pc_insn_offset,
+                                     uint32_t boot_image_offset) {
+  DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
+  return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
+void CodeGeneratorRISCV64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
+  DCHECK(linker_patches->empty());
+  size_t size =
+      boot_image_method_patches_.size() +
+      method_bss_entry_patches_.size() +
+      boot_image_type_patches_.size() +
+      type_bss_entry_patches_.size() +
+      public_type_bss_entry_patches_.size() +
+      package_type_bss_entry_patches_.size() +
+      boot_image_string_patches_.size() +
+      string_bss_entry_patches_.size() +
+      boot_image_jni_entrypoint_patches_.size() +
+      boot_image_other_patches_.size();
+  linker_patches->reserve(size);
+  if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
+    EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+        boot_image_method_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+        boot_image_type_patches_, linker_patches);
+    EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+        boot_image_string_patches_, linker_patches);
+  } else {
+    DCHECK(boot_image_method_patches_.empty());
+    DCHECK(boot_image_type_patches_.empty());
+    DCHECK(boot_image_string_patches_.empty());
+  }
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+        boot_image_other_patches_, linker_patches);
+  } else {
+    EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+        boot_image_other_patches_, linker_patches);
+  }
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+      method_bss_entry_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+      type_bss_entry_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
+      public_type_bss_entry_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
+      package_type_bss_entry_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+      string_bss_entry_patches_, linker_patches);
+  EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
+      boot_image_jni_entrypoint_patches_, linker_patches);
+  DCHECK_EQ(size, linker_patches->size());
+}
+
+void CodeGeneratorRISCV64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
+  switch (load_kind) {
+    case MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+          NewBootImageMethodPatch(invoke->GetResolvedMethodReference());
+      EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+      CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+          NewBootImageMethodPatch(invoke->GetResolvedMethodReference(), info_high);
+      EmitPcRelativeAddiPlaceholder(
+          info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+      break;
+    }
+    case MethodLoadKind::kBootImageRelRo: {
+      uint32_t boot_image_offset = GetBootImageOffset(invoke);
+      PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+      EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+      PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+      // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+      EmitPcRelativeLwuPlaceholder(
+          info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+      break;
+    }
+    case MethodLoadKind::kBssEntry: {
+      PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(invoke->GetMethodReference());
+      EmitPcRelativeAuipcPlaceholder(info_high, temp.AsRegister<XRegister>());
+      PcRelativePatchInfo* info_low =
+          NewMethodBssEntryPatch(invoke->GetMethodReference(), info_high);
+      EmitPcRelativeLdPlaceholder(
+          info_low, temp.AsRegister<XRegister>(), temp.AsRegister<XRegister>());
+      break;
+    }
+    case MethodLoadKind::kJitDirectAddress: {
+      __ LoadConst64(temp.AsRegister<XRegister>(),
+                     reinterpret_cast<uint64_t>(invoke->GetResolvedMethod()));
+      break;
+    }
+    case MethodLoadKind::kRuntimeCall: {
+      // Test situation, don't do anything.
+      break;
+    }
+    default: {
+      LOG(FATAL) << "Load kind should have already been handled " << load_kind;
+      UNREACHABLE();
+    }
+  }
+}
+
+void CodeGeneratorRISCV64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                                      Location temp,
+                                                      SlowPathCode* slow_path) {
+  // All registers are assumed to be correctly set up per the calling convention.
+  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
+
+  switch (invoke->GetMethodLoadKind()) {
+    case MethodLoadKind::kStringInit: {
+      // temp = thread->string_init_entrypoint
+      uint32_t offset =
+          GetThreadOffset<kRiscv64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+      __ Loadd(temp.AsRegister<XRegister>(), TR, offset);
+      break;
+    }
+    case MethodLoadKind::kRecursive:
+      callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
+      break;
+    case MethodLoadKind::kRuntimeCall:
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
+    case MethodLoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
+      if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
+        // Do not materialize the method pointer, load directly the entrypoint.
+        CodeGeneratorRISCV64::PcRelativePatchInfo* info_high =
+            NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference());
+        EmitPcRelativeAuipcPlaceholder(info_high, RA);
+        CodeGeneratorRISCV64::PcRelativePatchInfo* info_low =
+            NewBootImageJniEntrypointPatch(invoke->GetResolvedMethodReference(), info_high);
+        EmitPcRelativeLdPlaceholder(info_low, RA, RA);
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    default:
+      LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
+      break;
+  }
+
+  switch (invoke->GetCodePtrLocation()) {
+    case CodePtrLocation::kCallSelf:
+      DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
+      __ Jal(&frame_entry_label_);
+      RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+      break;
+    case CodePtrLocation::kCallArtMethod:
+      // RA = callee_method->entry_point_from_quick_compiled_code_;
+      __ Loadd(RA,
+               callee_method.AsRegister<XRegister>(),
+               ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize).Int32Value());
+      // RA()
+      __ Jalr(RA);
+      RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+      break;
+    case CodePtrLocation::kCallCriticalNative: {
+      size_t out_frame_size =
+          PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorRiscv64,
+                                    kNativeStackAlignment,
+                                    GetCriticalNativeDirectCallFrameSize>(invoke);
+      if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
+        // Entrypoint is already loaded in RA.
+      } else {
+        // RA = callee_method->ptr_sized_fields_.data_;  // EntryPointFromJni
+        MemberOffset offset = ArtMethod::EntryPointFromJniOffset(kRiscv64PointerSize);
+        __ Loadd(RA, callee_method.AsRegister<XRegister>(), offset.Int32Value());
+      }
+      __ Jalr(RA);
+      RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+      // The result is returned the same way in native ABI and managed ABI. No result conversion is
+      // needed, see comments in `Riscv64JniCallingConvention::RequiresSmallResultTypeExtension()`.
+      if (out_frame_size != 0u) {
+        DecreaseFrame(out_frame_size);
+      }
+      break;
+    }
+  }
+
+  DCHECK(!IsLeafMethod());
+}
+
+void CodeGeneratorRISCV64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
+                                                         XRegister klass) {
+  // We know the destination of an intrinsic, so no need to record inline caches.
+  if (!instruction->GetLocations()->Intrinsified() &&
+      GetGraph()->IsCompilingBaseline() &&
+      !Runtime::Current()->IsAotCompiler()) {
+    DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+    ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+    DCHECK(info != nullptr);
+    InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+    uint64_t address = reinterpret_cast64<uint64_t>(cache);
+    Riscv64Label done;
+    // The `art_quick_update_inline_cache` expects the inline cache in T5.
+    XRegister ic_reg = T5;
+    ScratchRegisterScope srs(GetAssembler());
+    DCHECK_EQ(srs.AvailableXRegisters(), 2u);
+    srs.ExcludeXRegister(ic_reg);
+    DCHECK_EQ(srs.AvailableXRegisters(), 1u);
+    __ LoadConst64(ic_reg, address);
+    {
+      ScratchRegisterScope srs2(GetAssembler());
+      XRegister tmp = srs2.AllocateXRegister();
+      __ Loadd(tmp, ic_reg, InlineCache::ClassesOffset().Int32Value());
+      // Fast path for a monomorphic cache.
+      __ Beq(klass, tmp, &done);
+    }
+    InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+    __ Bind(&done);
+  }
+}
+
+void CodeGeneratorRISCV64::GenerateVirtualCall(HInvokeVirtual* invoke,
+                                               Location temp_location,
+                                               SlowPathCode* slow_path) {
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConvention calling_convention;
+  XRegister receiver = calling_convention.GetRegisterAt(0);
+  XRegister temp = temp_location.AsRegister<XRegister>();
+  MemberOffset method_offset =
+      mirror::Class::EmbeddedVTableEntryOffset(invoke->GetVTableIndex(), kRiscv64PointerSize);
+  MemberOffset class_offset = mirror::Object::ClassOffset();
+  Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kRiscv64PointerSize);
+
+  // temp = object->GetClass();
+  __ Loadwu(temp, receiver, class_offset.Int32Value());
+  MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  MaybeUnpoisonHeapReference(temp);
+
+  // If we're compiling baseline, update the inline cache.
+  MaybeGenerateInlineCacheCheck(invoke, temp);
+
+  // temp = temp->GetMethodAt(method_offset);
+  __ Loadd(temp, temp, method_offset.Int32Value());
+  // RA = temp->GetEntryPoint();
+  __ Loadd(RA, temp, entry_point.Int32Value());
+  // RA();
+  __ Jalr(RA);
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+}
+
+void CodeGeneratorRISCV64::MoveFromReturnRegister(Location trg, DataType::Type type) {
+  if (!trg.IsValid()) {
+    DCHECK_EQ(type, DataType::Type::kVoid);
+    return;
+  }
+
+  DCHECK_NE(type, DataType::Type::kVoid);
+
+  if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) {
+    XRegister trg_reg = trg.AsRegister<XRegister>();
+    XRegister res_reg = Riscv64ReturnLocation(type).AsRegister<XRegister>();
+    if (trg_reg != res_reg) {
+      __ Mv(trg_reg, res_reg);
+    }
+  } else {
+    FRegister trg_reg = trg.AsFpuRegister<FRegister>();
+    FRegister res_reg = Riscv64ReturnLocation(type).AsFpuRegister<FRegister>();
+    if (trg_reg != res_reg) {
+      __ FMvD(trg_reg, res_reg);  // 64-bit move is OK also for `float`.
+    }
+  }
+}
+
+void CodeGeneratorRISCV64::PoisonHeapReference(XRegister reg) {
+  __ Sub(reg, Zero, reg);  // Negate the ref.
+  __ ZextW(reg, reg);      // Zero-extend the 32-bit ref.
+}
+
+void CodeGeneratorRISCV64::UnpoisonHeapReference(XRegister reg) {
+  __ Sub(reg, Zero, reg);  // Negate the ref.
+  __ ZextW(reg, reg);      // Zero-extend the 32-bit ref.
+}
+
+inline void CodeGeneratorRISCV64::MaybePoisonHeapReference(XRegister reg) {
+  if (kPoisonHeapReferences) {
+    PoisonHeapReference(reg);
+  }
+}
+
+inline void CodeGeneratorRISCV64::MaybeUnpoisonHeapReference(XRegister reg) {
+  if (kPoisonHeapReferences) {
+    UnpoisonHeapReference(reg);
+  }
+}
+
+void CodeGeneratorRISCV64::SwapLocations(Location loc1, Location loc2, DataType::Type type) {
+  DCHECK(!loc1.IsConstant());
+  DCHECK(!loc2.IsConstant());
+
+  if (loc1.Equals(loc2)) {
+    return;
+  }
+
+  bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot();
+  bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot();
+  bool is_simd1 = loc1.IsSIMDStackSlot();
+  bool is_simd2 = loc2.IsSIMDStackSlot();
+  bool is_fp_reg1 = loc1.IsFpuRegister();
+  bool is_fp_reg2 = loc2.IsFpuRegister();
+
+  if ((is_slot1 != is_slot2) ||
+      (loc2.IsRegister() && loc1.IsRegister()) ||
+      (is_fp_reg2 && is_fp_reg1)) {
+    if ((is_fp_reg2 && is_fp_reg1) && GetGraph()->HasSIMD()) {
+      LOG(FATAL) << "Unsupported";
+      UNREACHABLE();
+    }
+    ScratchRegisterScope srs(GetAssembler());
+    Location tmp = (is_fp_reg2 || is_fp_reg1)
+        ? Location::FpuRegisterLocation(srs.AllocateFRegister())
+        : Location::RegisterLocation(srs.AllocateXRegister());
+    MoveLocation(tmp, loc1, type);
+    MoveLocation(loc1, loc2, type);
+    MoveLocation(loc2, tmp, type);
+  } else if (is_slot1 && is_slot2) {
+    move_resolver_.Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), loc1.IsDoubleStackSlot());
+  } else if (is_simd1 && is_simd2) {
+    // TODO(riscv64): Add VECTOR/SIMD later.
+    UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+  } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) {
+    // TODO(riscv64): Add VECTOR/SIMD later.
+    UNIMPLEMENTED(FATAL) << "Vector extension is unsupported";
+  } else {
+    LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2;
+  }
+}
+
+}  // namespace riscv64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index 405b39aa0a..375cec957f 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -17,7 +17,888 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
 
+#include "android-base/logging.h"
+#include "arch/riscv64/registers_riscv64.h"
+#include "base/macros.h"
 #include "code_generator.h"
 #include "driver/compiler_options.h"
+#include "intrinsics_list.h"
+#include "optimizing/locations.h"
+#include "parallel_move_resolver.h"
+#include "utils/riscv64/assembler_riscv64.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+// InvokeDexCallingConvention registers
+static constexpr XRegister kParameterCoreRegisters[] = {A1, A2, A3, A4, A5, A6, A7};
+static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+static constexpr FRegister kParameterFpuRegisters[] = {FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7};
+static constexpr size_t kParameterFpuRegistersLength = arraysize(kParameterFpuRegisters);
+
+// InvokeRuntimeCallingConvention registers
+static constexpr XRegister kRuntimeParameterCoreRegisters[] = {A0, A1, A2, A3, A4, A5, A6, A7};
+static constexpr size_t kRuntimeParameterCoreRegistersLength =
+    arraysize(kRuntimeParameterCoreRegisters);
+
+static constexpr FRegister kRuntimeParameterFpuRegisters[] = {
+    FA0, FA1, FA2, FA3, FA4, FA5, FA6, FA7
+};
+static constexpr size_t kRuntimeParameterFpuRegistersLength =
+    arraysize(kRuntimeParameterFpuRegisters);
+
+#define UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(V) \
+  V(IntegerReverse)                             \
+  V(IntegerDivideUnsigned)                      \
+  V(LongReverse)                                \
+  V(LongDivideUnsigned)                         \
+  V(MathFmaDouble)                              \
+  V(MathFmaFloat)                               \
+  V(MathCos)                                    \
+  V(MathSin)                                    \
+  V(MathAcos)                                   \
+  V(MathAsin)                                   \
+  V(MathAtan)                                   \
+  V(MathAtan2)                                  \
+  V(MathPow)                                    \
+  V(MathCbrt)                                   \
+  V(MathCosh)                                   \
+  V(MathExp)                                    \
+  V(MathExpm1)                                  \
+  V(MathHypot)                                  \
+  V(MathLog)                                    \
+  V(MathLog10)                                  \
+  V(MathNextAfter)                              \
+  V(MathSinh)                                   \
+  V(MathTan)                                    \
+  V(MathTanh)                                   \
+  V(MathSqrt)                                   \
+  V(MathCeil)                                   \
+  V(MathFloor)                                  \
+  V(MathRint)                                   \
+  V(MathRoundDouble)                            \
+  V(MathRoundFloat)                             \
+  V(MathMultiplyHigh)                           \
+  V(SystemArrayCopyByte)                        \
+  V(SystemArrayCopyChar)                        \
+  V(SystemArrayCopyInt)                         \
+  V(SystemArrayCopy)                            \
+  V(ThreadCurrentThread)                        \
+  V(FP16Ceil)                                   \
+  V(FP16Compare)                                \
+  V(FP16Floor)                                  \
+  V(FP16Rint)                                   \
+  V(FP16ToFloat)                                \
+  V(FP16ToHalf)                                 \
+  V(FP16Greater)                                \
+  V(FP16GreaterEquals)                          \
+  V(FP16Less)                                   \
+  V(FP16LessEquals)                             \
+  V(FP16Min)                                    \
+  V(FP16Max)                                    \
+  V(StringCompareTo)                            \
+  V(StringEquals)                               \
+  V(StringGetCharsNoCheck)                      \
+  V(StringIndexOf)                              \
+  V(StringIndexOfAfter)                         \
+  V(StringStringIndexOf)                        \
+  V(StringStringIndexOfAfter)                   \
+  V(StringNewStringFromBytes)                   \
+  V(StringNewStringFromChars)                   \
+  V(StringNewStringFromString)                  \
+  V(StringBufferAppend)                         \
+  V(StringBufferLength)                         \
+  V(StringBufferToString)                       \
+  V(StringBuilderAppendObject)                  \
+  V(StringBuilderAppendString)                  \
+  V(StringBuilderAppendCharSequence)            \
+  V(StringBuilderAppendCharArray)               \
+  V(StringBuilderAppendBoolean)                 \
+  V(StringBuilderAppendChar)                    \
+  V(StringBuilderAppendInt)                     \
+  V(StringBuilderAppendLong)                    \
+  V(StringBuilderAppendFloat)                   \
+  V(StringBuilderAppendDouble)                  \
+  V(StringBuilderLength)                        \
+  V(StringBuilderToString)                      \
+  V(UnsafeCASInt)                               \
+  V(UnsafeCASLong)                              \
+  V(UnsafeCASObject)                            \
+  V(UnsafeGet)                                  \
+  V(UnsafeGetVolatile)                          \
+  V(UnsafeGetObject)                            \
+  V(UnsafeGetObjectVolatile)                    \
+  V(UnsafeGetLong)                              \
+  V(UnsafeGetLongVolatile)                      \
+  V(UnsafePut)                                  \
+  V(UnsafePutOrdered)                           \
+  V(UnsafePutVolatile)                          \
+  V(UnsafePutObject)                            \
+  V(UnsafePutObjectOrdered)                     \
+  V(UnsafePutObjectVolatile)                    \
+  V(UnsafePutLong)                              \
+  V(UnsafePutLongOrdered)                       \
+  V(UnsafePutLongVolatile)                      \
+  V(UnsafeGetAndAddInt)                         \
+  V(UnsafeGetAndAddLong)                        \
+  V(UnsafeGetAndSetInt)                         \
+  V(UnsafeGetAndSetLong)                        \
+  V(UnsafeGetAndSetObject)                      \
+  V(JdkUnsafeCASInt)                            \
+  V(JdkUnsafeCASLong)                           \
+  V(JdkUnsafeCASObject)                         \
+  V(JdkUnsafeCompareAndSetInt)                  \
+  V(JdkUnsafeCompareAndSetLong)                 \
+  V(JdkUnsafeCompareAndSetObject)               \
+  V(JdkUnsafeGet)                               \
+  V(JdkUnsafeGetVolatile)                       \
+  V(JdkUnsafeGetAcquire)                        \
+  V(JdkUnsafeGetObject)                         \
+  V(JdkUnsafeGetObjectVolatile)                 \
+  V(JdkUnsafeGetObjectAcquire)                  \
+  V(JdkUnsafeGetLong)                           \
+  V(JdkUnsafeGetLongVolatile)                   \
+  V(JdkUnsafeGetLongAcquire)                    \
+  V(JdkUnsafePut)                               \
+  V(JdkUnsafePutOrdered)                        \
+  V(JdkUnsafePutRelease)                        \
+  V(JdkUnsafePutVolatile)                       \
+  V(JdkUnsafePutObject)                         \
+  V(JdkUnsafePutObjectOrdered)                  \
+  V(JdkUnsafePutObjectVolatile)                 \
+  V(JdkUnsafePutObjectRelease)                  \
+  V(JdkUnsafePutLong)                           \
+  V(JdkUnsafePutLongOrdered)                    \
+  V(JdkUnsafePutLongVolatile)                   \
+  V(JdkUnsafePutLongRelease)                    \
+  V(JdkUnsafeGetAndAddInt)                      \
+  V(JdkUnsafeGetAndAddLong)                     \
+  V(JdkUnsafeGetAndSetInt)                      \
+  V(JdkUnsafeGetAndSetLong)                     \
+  V(JdkUnsafeGetAndSetObject)                   \
+  V(ReferenceGetReferent)                       \
+  V(ReferenceRefersTo)                          \
+  V(IntegerValueOf)                             \
+  V(ThreadInterrupted)                          \
+  V(ReachabilityFence)                          \
+  V(CRC32Update)                                \
+  V(CRC32UpdateBytes)                           \
+  V(CRC32UpdateByteBuffer)                      \
+  V(MethodHandleInvokeExact)                    \
+  V(MethodHandleInvoke)                         \
+  V(VarHandleCompareAndExchange)                \
+  V(VarHandleCompareAndExchangeAcquire)         \
+  V(VarHandleCompareAndExchangeRelease)         \
+  V(VarHandleCompareAndSet)                     \
+  V(VarHandleGet)                               \
+  V(VarHandleGetAcquire)                        \
+  V(VarHandleGetAndAdd)                         \
+  V(VarHandleGetAndAddAcquire)                  \
+  V(VarHandleGetAndAddRelease)                  \
+  V(VarHandleGetAndBitwiseAnd)                  \
+  V(VarHandleGetAndBitwiseAndAcquire)           \
+  V(VarHandleGetAndBitwiseAndRelease)           \
+  V(VarHandleGetAndBitwiseOr)                   \
+  V(VarHandleGetAndBitwiseOrAcquire)            \
+  V(VarHandleGetAndBitwiseOrRelease)            \
+  V(VarHandleGetAndBitwiseXor)                  \
+  V(VarHandleGetAndBitwiseXorAcquire)           \
+  V(VarHandleGetAndBitwiseXorRelease)           \
+  V(VarHandleGetAndSet)                         \
+  V(VarHandleGetAndSetAcquire)                  \
+  V(VarHandleGetAndSetRelease)                  \
+  V(VarHandleGetOpaque)                         \
+  V(VarHandleGetVolatile)                       \
+  V(VarHandleSet)                               \
+  V(VarHandleSetOpaque)                         \
+  V(VarHandleSetRelease)                        \
+  V(VarHandleSetVolatile)                       \
+  V(VarHandleWeakCompareAndSet)                 \
+  V(VarHandleWeakCompareAndSetAcquire)          \
+  V(VarHandleWeakCompareAndSetPlain)            \
+  V(VarHandleWeakCompareAndSetRelease)
+
+// Method register on invoke.
+static const XRegister kArtMethodRegister = A0;
+
+class CodeGeneratorRISCV64;
+
+class InvokeRuntimeCallingConvention : public CallingConvention<XRegister, FRegister> {
+ public:
+  InvokeRuntimeCallingConvention()
+      : CallingConvention(kRuntimeParameterCoreRegisters,
+                          kRuntimeParameterCoreRegistersLength,
+                          kRuntimeParameterFpuRegisters,
+                          kRuntimeParameterFpuRegistersLength,
+                          kRiscv64PointerSize) {}
+
+  Location GetReturnLocation(DataType::Type return_type);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention);
+};
+
+class InvokeDexCallingConvention : public CallingConvention<XRegister, FRegister> {
+ public:
+  InvokeDexCallingConvention()
+      : CallingConvention(kParameterCoreRegisters,
+                          kParameterCoreRegistersLength,
+                          kParameterFpuRegisters,
+                          kParameterFpuRegistersLength,
+                          kRiscv64PointerSize) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConvention);
+};
+
+class InvokeDexCallingConventionVisitorRISCV64 : public InvokeDexCallingConventionVisitor {
+ public:
+  InvokeDexCallingConventionVisitorRISCV64() {}
+  virtual ~InvokeDexCallingConventionVisitorRISCV64() {}
+
+  Location GetNextLocation(DataType::Type type) override;
+  Location GetReturnLocation(DataType::Type type) const override;
+  Location GetMethodLocation() const override;
+
+ private:
+  InvokeDexCallingConvention calling_convention;
+
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionVisitorRISCV64);
+};
+
+class CriticalNativeCallingConventionVisitorRiscv64 : public InvokeDexCallingConventionVisitor {
+ public:
+  explicit CriticalNativeCallingConventionVisitorRiscv64(bool for_register_allocation)
+      : for_register_allocation_(for_register_allocation) {}
+
+  virtual ~CriticalNativeCallingConventionVisitorRiscv64() {}
+
+  Location GetNextLocation(DataType::Type type) override;
+  Location GetReturnLocation(DataType::Type type) const override;
+  Location GetMethodLocation() const override;
+
+  size_t GetStackOffset() const { return stack_offset_; }
+
+ private:
+  // Register allocator does not support adjusting frame size, so we cannot provide final locations
+  // of stack arguments for register allocation. We ask the register allocator for any location and
+  // move these arguments to the right place after adjusting the SP when generating the call.
+  const bool for_register_allocation_;
+  size_t gpr_index_ = 0u;
+  size_t fpr_index_ = 0u;
+  size_t stack_offset_ = 0u;
+
+  DISALLOW_COPY_AND_ASSIGN(CriticalNativeCallingConventionVisitorRiscv64);
+};
+
+class SlowPathCodeRISCV64 : public SlowPathCode {
+ public:
+  explicit SlowPathCodeRISCV64(HInstruction* instruction)
+      : SlowPathCode(instruction), entry_label_(), exit_label_() {}
+
+  Riscv64Label* GetEntryLabel() { return &entry_label_; }
+  Riscv64Label* GetExitLabel() { return &exit_label_; }
+
+ private:
+  Riscv64Label entry_label_;
+  Riscv64Label exit_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SlowPathCodeRISCV64);
+};
+
+class ParallelMoveResolverRISCV64 : public ParallelMoveResolverWithSwap {
+ public:
+  ParallelMoveResolverRISCV64(ArenaAllocator* allocator, CodeGeneratorRISCV64* codegen)
+      : ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
+
+  void EmitMove(size_t index) override;
+  void EmitSwap(size_t index) override;
+  void SpillScratch(int reg) override;
+  void RestoreScratch(int reg) override;
+
+  void Exchange(int index1, int index2, bool double_slot);
+
+  Riscv64Assembler* GetAssembler() const;
+
+ private:
+  CodeGeneratorRISCV64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolverRISCV64);
+};
+
+class FieldAccessCallingConventionRISCV64 : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionRISCV64() {}
+
+  Location GetObjectLocation() const override {
+    return Location::RegisterLocation(A1);
+  }
+  Location GetFieldIndexLocation() const override {
+    return Location::RegisterLocation(A0);
+  }
+  Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+    return Location::RegisterLocation(A0);
+  }
+  Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
+                               bool is_instance) const override {
+    return is_instance
+        ? Location::RegisterLocation(A2)
+        : Location::RegisterLocation(A1);
+  }
+  Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+    return Location::FpuRegisterLocation(FA0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionRISCV64);
+};
+
+class LocationsBuilderRISCV64 : public HGraphVisitor {
+ public:
+  LocationsBuilderRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen)
+      : HGraphVisitor(graph), codegen_(codegen) {}
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void VisitInstruction(HInstruction* instruction) override {
+    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id "
+               << instruction->GetId() << ")";
+  }
+
+ protected:
+  void HandleInvoke(HInvoke* invoke);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction);
+  void HandleFieldGet(HInstruction* instruction);
+
+  InvokeDexCallingConventionVisitorRISCV64 parameter_visitor_;
+
+  CodeGeneratorRISCV64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(LocationsBuilderRISCV64);
+};
+
+class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
+ public:
+  InstructionCodeGeneratorRISCV64(HGraph* graph, CodeGeneratorRISCV64* codegen);
+
+#define DECLARE_VISIT_INSTRUCTION(name, super) void Visit##name(H##name* instr) override;
+
+  FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+  void VisitInstruction(HInstruction* instruction) override {
+    LOG(FATAL) << "Unreachable instruction " << instruction->DebugName() << " (id "
+               << instruction->GetId() << ")";
+  }
+
+  Riscv64Assembler* GetAssembler() const { return assembler_; }
+
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
+  void ShNAdd(XRegister rd, XRegister rs1, XRegister rs2, DataType::Type type);
+
+ protected:
+  void GenerateClassInitializationCheck(SlowPathCodeRISCV64* slow_path, XRegister class_reg);
+  void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, XRegister temp);
+  void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+  void HandleBinaryOp(HBinaryOperation* operation);
+  void HandleCondition(HCondition* instruction);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction,
+                      const FieldInfo& field_info,
+                      bool value_can_be_null,
+                      WriteBarrierKind write_barrier_kind);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
+
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers (if any).
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               XRegister obj,
+                               uint32_t offset,
+                               ReadBarrierOption read_barrier_option,
+                               Riscv64Label* label_low = nullptr);
+
+  void GenerateTestAndBranch(HInstruction* instruction,
+                             size_t condition_input_index,
+                             Riscv64Label* true_target,
+                             Riscv64Label* false_target);
+  void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+  void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+  void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+  void GenerateDivRemIntegral(HBinaryOperation* instruction);
+  void GenerateIntLongCondition(IfCondition cond, LocationSummary* locations);
+  void GenerateIntLongCompareAndBranch(IfCondition cond,
+                                       LocationSummary* locations,
+                                       Riscv64Label* label);
+  void GenerateFpCondition(IfCondition cond,
+                           bool gt_bias,
+                           DataType::Type type,
+                           LocationSummary* locations,
+                           Riscv64Label* label = nullptr);
+  void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenPackedSwitchWithCompares(XRegister adjusted,
+                                   XRegister temp,
+                                   uint32_t num_entries,
+                                   HBasicBlock* switch_block);
+  void GenTableBasedPackedSwitch(XRegister adjusted,
+                                 XRegister temp,
+                                 uint32_t num_entries,
+                                 HBasicBlock* switch_block);
+  int32_t VecAddress(LocationSummary* locations,
+                     size_t size,
+                     /*out*/ XRegister* adjusted_base);
+  void GenConditionalMove(HSelect* select);
+
+  template <typename Reg,
+            void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
+            void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
+  void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FMin(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FMax(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FEq(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FLt(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+  void FLe(XRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
+
+  template <typename Reg,
+            void (Riscv64Assembler::*opS)(Reg, FRegister),
+            void (Riscv64Assembler::*opD)(Reg, FRegister)>
+  void FpUnOp(Reg rd, FRegister rs1, DataType::Type type);
+  void FAbs(FRegister rd, FRegister rs1, DataType::Type type);
+  void FNeg(FRegister rd, FRegister rs1, DataType::Type type);
+  void FMv(FRegister rd, FRegister rs1, DataType::Type type);
+  void FClass(XRegister rd, FRegister rs1, DataType::Type type);
+
+  void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type);
+  void Store(Location value, XRegister rs1, int32_t offset, DataType::Type type);
+
+  Riscv64Assembler* const assembler_;
+  CodeGeneratorRISCV64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorRISCV64);
+};
+
+class CodeGeneratorRISCV64 : public CodeGenerator {
+ public:
+  CodeGeneratorRISCV64(HGraph* graph,
+                       const CompilerOptions& compiler_options,
+                       OptimizingCompilerStats* stats = nullptr);
+  virtual ~CodeGeneratorRISCV64() {}
+
+  void GenerateFrameEntry() override;
+  void GenerateFrameExit() override;
+
+  void Bind(HBasicBlock* block) override;
+
+  size_t GetWordSize() const override {
+    // The "word" for the compiler is the core register size (64-bit for riscv64) while the
+    // riscv64 assembler uses "word" for 32-bit values and "double word" for 64-bit values.
+    return kRiscv64DoublewordSize;
+  }
+
+  bool SupportsPredicatedSIMD() const override {
+    // TODO(riscv64): Check the vector extension.
+    return false;
+  }
+
+  // Get FP register width in bytes for spilling/restoring in the slow paths.
+  //
+  // Note: In SIMD graphs this should return SIMD register width as all FP and SIMD registers
+  // alias and live SIMD registers are forced to be spilled in full size in the slow paths.
+  size_t GetSlowPathFPWidth() const override {
+    // Default implementation.
+    return GetCalleePreservedFPWidth();
+  }
+
+  size_t GetCalleePreservedFPWidth() const override {
+    return kRiscv64FloatRegSizeInBytes;
+  };
+
+  size_t GetSIMDRegisterWidth() const override {
+    // TODO(riscv64): Implement SIMD with the Vector extension.
+    // Note: HLoopOptimization calls this function even for an ISA without SIMD support.
+    return kRiscv64FloatRegSizeInBytes;
+  };
+
+  uintptr_t GetAddressOf(HBasicBlock* block) override {
+    return assembler_.GetLabelLocation(GetLabelOf(block));
+  };
+
+  Riscv64Label* GetLabelOf(HBasicBlock* block) const {
+    return CommonGetLabelOf<Riscv64Label>(block_labels_, block);
+  }
+
+  void Initialize() override { block_labels_ = CommonInitializeLabels<Riscv64Label>(); }
+
+  void MoveConstant(Location destination, int32_t value) override;
+  void MoveLocation(Location destination, Location source, DataType::Type dst_type) override;
+  void AddLocationAsTemp(Location location, LocationSummary* locations) override;
+
+  Riscv64Assembler* GetAssembler() override { return &assembler_; }
+  const Riscv64Assembler& GetAssembler() const override { return assembler_; }
+
+  HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
+  HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
+
+  void MaybeGenerateInlineCacheCheck(HInstruction* instruction, XRegister klass);
+
+  void SetupBlockedRegisters() const override;
+
+  size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+  size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+  size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+
+  void DumpCoreRegister(std::ostream& stream, int reg) const override;
+  void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
+
+  InstructionSet GetInstructionSet() const override { return InstructionSet::kRiscv64; }
+
+  uint32_t GetPreferredSlotsAlignment() const override {
+    return static_cast<uint32_t>(kRiscv64PointerSize);
+  }
+
+  void Finalize() override;
+
+  // Generate code to invoke a runtime entry point.
+  void InvokeRuntime(QuickEntrypointEnum entrypoint,
+                     HInstruction* instruction,
+                     uint32_t dex_pc,
+                     SlowPathCode* slow_path = nullptr) override;
+
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
+  ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; }
+
+  bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
+
+  void IncreaseFrame(size_t adjustment) override;
+  void DecreaseFrame(size_t adjustment) override;
+
+  void GenerateNop() override;
+
+  void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+  void GenerateExplicitNullCheck(HNullCheck* instruction) override;
+
+  // Check if the desired_string_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadString::LoadKind GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind) override;
+
+  // Check if the desired_class_load_kind is supported. If it is, return it,
+  // otherwise return a fall-back kind that should be used instead.
+  HLoadClass::LoadKind GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind) override;
+
+  // Check if the desired_dispatch_info is supported. If it is, return it,
+  // otherwise return a fall-back info that should be used instead.
+  HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, ArtMethod* method) override;
+
+  // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
+  // whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
+  //
+  // The 20-bit and 12-bit parts of the 32-bit PC-relative offset are patched separately,
+  // necessitating two patches/infos. There can be more than two patches/infos if the
+  // instruction supplying the high part is shared with e.g. a slow path, while the low
+  // part is supplied by separate instructions, e.g.:
+  //     auipc r1, high       // patch
+  //     lwu   r2, low(r1)    // patch
+  //     beqz  r2, slow_path
+  //   back:
+  //     ...
+  //   slow_path:
+  //     ...
+  //     sw    r2, low(r1)    // patch
+  //     j     back
+  struct PcRelativePatchInfo : PatchInfo<Riscv64Label> {
+    PcRelativePatchInfo(const DexFile* dex_file,
+                        uint32_t off_or_idx,
+                        const PcRelativePatchInfo* info_high)
+        : PatchInfo<Riscv64Label>(dex_file, off_or_idx),
+          pc_insn_label(info_high != nullptr ? &info_high->label : &label) {
+      DCHECK_IMPLIES(info_high != nullptr, info_high->pc_insn_label == &info_high->label);
+    }
+
+    // Pointer to the info for the high part patch or nullptr if this is the high part patch info.
+    const Riscv64Label* pc_insn_label;
+
+   private:
+    PcRelativePatchInfo(PcRelativePatchInfo&& other) = delete;
+    DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
+  };
+
+  PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
+                                                  const PcRelativePatchInfo* info_high = nullptr);
+  PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+                                              const PcRelativePatchInfo* info_high = nullptr);
+  PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
+                                               const PcRelativePatchInfo* info_high = nullptr);
+  PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
+                                              const PcRelativePatchInfo* info_high = nullptr);
+  PcRelativePatchInfo* NewBootImageJniEntrypointPatch(
+      MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr);
+
+  PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file,
+                                             dex::TypeIndex type_index,
+                                             const PcRelativePatchInfo* info_high = nullptr);
+  PcRelativePatchInfo* NewTypeBssEntryPatch(HLoadClass* load_class,
+                                            const PcRelativePatchInfo* info_high = nullptr);
+  PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file,
+                                               dex::StringIndex string_index,
+                                               const PcRelativePatchInfo* info_high = nullptr);
+  PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
+                                              dex::StringIndex string_index,
+                                              const PcRelativePatchInfo* info_high = nullptr);
+
+  void EmitPcRelativeAuipcPlaceholder(PcRelativePatchInfo* info_high, XRegister out);
+  void EmitPcRelativeAddiPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+  void EmitPcRelativeLwuPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+  void EmitPcRelativeLdPlaceholder(PcRelativePatchInfo* info_low, XRegister rd, XRegister rs1);
+
+  void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
+
+  Literal* DeduplicateBootImageAddressLiteral(uint64_t address);
+  void PatchJitRootUse(uint8_t* code,
+                       const uint8_t* roots_data,
+                       const Literal* literal,
+                       uint64_t index_in_table) const;
+  Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
+                                       dex::StringIndex string_index,
+                                       Handle<mirror::String> handle);
+  Literal* DeduplicateJitClassLiteral(const DexFile& dex_file,
+                                      dex::TypeIndex type_index,
+                                      Handle<mirror::Class> handle);
+  void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
+
+  void LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke);
+  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
+                                  Location temp,
+                                  SlowPathCode* slow_path = nullptr) override;
+  void GenerateVirtualCall(HInvokeVirtual* invoke,
+                           Location temp,
+                           SlowPathCode* slow_path = nullptr) override;
+  void MoveFromReturnRegister(Location trg, DataType::Type type) override;
+
+  void GenerateMemoryBarrier(MemBarrierKind kind);
+
+  void MaybeIncrementHotness(bool is_frame_entry);
+
+  bool CanUseImplicitSuspendCheck() const;
+
+
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             XRegister obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             XRegister obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier
+  // and GenerateArrayLoadWithBakerReadBarrier.
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 XRegister obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 Location temp,
+                                                 bool needs_null_check);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
+  void MarkGCCard(XRegister object, XRegister value, bool value_can_be_null);
+
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(XRegister reg);
+
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(XRegister reg);
+
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(XRegister reg);
+
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(XRegister reg);
+
+  void SwapLocations(Location loc1, Location loc2, DataType::Type type);
+
+ private:
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
+  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>;
+  using StringToLiteralMap =
+      ArenaSafeMap<StringReference, Literal*, StringReferenceValueComparator>;
+  using TypeToLiteralMap = ArenaSafeMap<TypeReference, Literal*, TypeReferenceValueComparator>;
+
+  Literal* DeduplicateUint32Literal(uint32_t value);
+  Literal* DeduplicateUint64Literal(uint64_t value);
+
+  PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file,
+                                          uint32_t offset_or_index,
+                                          const PcRelativePatchInfo* info_high,
+                                          ArenaDeque<PcRelativePatchInfo>* patches);
+
+  template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+  void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
+                                   ArenaVector<linker::LinkerPatch>* linker_patches);
+
+  Riscv64Assembler assembler_;
+  LocationsBuilderRISCV64 location_builder_;
+  InstructionCodeGeneratorRISCV64 instruction_visitor_;
+  Riscv64Label frame_entry_label_;
+
+  // Labels for each block that will be compiled.
+  Riscv64Label* block_labels_;  // Indexed by block id.
+
+  ParallelMoveResolverRISCV64 move_resolver_;
+
+  // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Deduplication map for 64-bit literals, used for non-patchable method address or method code
+  // address.
+  Uint64ToLiteralMap uint64_literals_;
+
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
+  // PC-relative type patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
+  // PC-relative type patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative public type patch info for kBssEntryPublic.
+  ArenaDeque<PcRelativePatchInfo> public_type_bss_entry_patches_;
+  // PC-relative package type patch info for kBssEntryPackage.
+  ArenaDeque<PcRelativePatchInfo> package_type_bss_entry_patches_;
+  // PC-relative String patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
+  // PC-relative String patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative+kCallCriticalNative.
+  ArenaDeque<PcRelativePatchInfo> boot_image_jni_entrypoint_patches_;
+  // PC-relative patch info for IntrinsicObjects for the boot image,
+  // and for method/type/string patches for kBootImageRelRo otherwise.
+  ArenaDeque<PcRelativePatchInfo> boot_image_other_patches_;
+
+  // Patches for string root accesses in JIT compiled code.
+  StringToLiteralMap jit_string_patches_;
+  // Patches for class root accesses in JIT compiled code.
+  TypeToLiteralMap jit_class_patches_;
+};
+
+}  // namespace riscv64
+}  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_RISCV64_H_
diff --git a/compiler/optimizing/code_generator_vector_arm64_neon.cc b/compiler/optimizing/code_generator_vector_arm64_neon.cc
index 6b6e25cf0c..848b5e7567 100644
--- a/compiler/optimizing/code_generator_vector_arm64_neon.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_neon.cc
@@ -61,10 +61,8 @@ inline bool NEONCanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
 //  - constant location - if 'constant' is an actual constant and its value can be
 //    encoded into the instruction.
 //  - register location otherwise.
-inline Location NEONEncodableConstantOrRegister(HInstruction* constant,
-                                                HInstruction* instr) {
-  if (constant->IsConstant()
-      && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+inline Location NEONEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
+  if (constant->IsConstant() && NEONCanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
     return Location::ConstantLocation(constant);
   }
 
@@ -1533,12 +1531,32 @@ void InstructionCodeGeneratorARM64Neon::VisitVecPredWhile(HVecPredWhile* instruc
   UNREACHABLE();
 }
 
-void LocationsBuilderARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
 
-void InstructionCodeGeneratorARM64Neon::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARM64Neon::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARM64Neon::VisitVecPredNot(HVecPredNot* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
diff --git a/compiler/optimizing/code_generator_vector_arm64_sve.cc b/compiler/optimizing/code_generator_vector_arm64_sve.cc
index fe15791d3f..ef79932899 100644
--- a/compiler/optimizing/code_generator_vector_arm64_sve.cc
+++ b/compiler/optimizing/code_generator_vector_arm64_sve.cc
@@ -62,8 +62,7 @@ static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* i
 //    encoded into the instruction.
 //  - register location otherwise.
 inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
-  if (constant->IsConstant()
-      && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+  if (constant->IsConstant() && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
     return Location::ConstantLocation(constant);
   }
 
@@ -246,7 +245,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister src = ZRegisterFrom(locations->InAt(0));
   const VRegister dst = DRegisterFrom(locations->Out());
-  const PRegister p_reg = LoopPReg();
+  const PRegister p_reg = GetVecGoverningPReg(instruction);
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kInt32:
@@ -284,7 +283,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister src = ZRegisterFrom(locations->InAt(0));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   DataType::Type from = instruction->GetInputType();
   DataType::Type to = instruction->GetResultType();
   ValidateVectorLength(instruction);
@@ -304,7 +303,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister src = ZRegisterFrom(locations->InAt(0));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kUint8:
@@ -342,7 +341,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister src = ZRegisterFrom(locations->InAt(0));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kInt8:
@@ -378,7 +377,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister src = ZRegisterFrom(locations->InAt(0));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:  // special case boolean-not
@@ -438,7 +437,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kUint8:
@@ -497,7 +496,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kUint8:
@@ -546,7 +545,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kUint8:
@@ -585,7 +584,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
 
   // Note: VIXL guarantees StrictNaNPropagation for Fdiv.
@@ -633,7 +632,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
@@ -678,7 +677,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
@@ -714,7 +713,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
@@ -769,7 +768,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
@@ -802,7 +801,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
@@ -835,7 +834,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
   const ZRegister dst = ZRegisterFrom(locations->Out());
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
   ValidateVectorLength(instruction);
   switch (instruction->GetPackedType()) {
@@ -966,7 +965,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
   const ZRegister left = ZRegisterFrom(locations->InAt(1));
   const ZRegister right = ZRegisterFrom(locations->InAt(2));
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
 
   DCHECK(locations->InAt(0).Equals(locations->Out()));
   ValidateVectorLength(instruction);
@@ -1029,7 +1028,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction)
   const ZRegister acc = ZRegisterFrom(locations->InAt(0));
   const ZRegister left = ZRegisterFrom(locations->InAt(1));
   const ZRegister right = ZRegisterFrom(locations->InAt(2));
-  const PRegisterM p_reg = LoopPReg().Merging();
+  const PRegisterM p_reg = GetVecGoverningPReg(instruction).Merging();
   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
@@ -1099,7 +1098,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
   const ZRegister reg = ZRegisterFrom(locations->Out());
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register scratch;
-  const PRegisterZ p_reg = LoopPReg().Zeroing();
+  const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
   ValidateVectorLength(instruction);
 
   switch (instruction->GetPackedType()) {
@@ -1141,7 +1140,7 @@ void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
   const ZRegister reg = ZRegisterFrom(locations->InAt(2));
   UseScratchRegisterScope temps(GetVIXLAssembler());
   Register scratch;
-  const PRegisterZ p_reg = LoopPReg().Zeroing();
+  const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
   ValidateVectorLength(instruction);
 
   switch (instruction->GetPackedType()) {
@@ -1182,25 +1181,25 @@ void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
 void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
   // Instruction is not predicated, see nodes_vector.h
   DCHECK(!instruction->IsPredicated());
-  const PRegister p_reg = LoopPReg();
+  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
 
   switch (instruction->GetPackedType()) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-      __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
+      __ Ptrue(output_p_reg.VnB(), vixl::aarch64::SVE_ALL);
       break;
     case DataType::Type::kUint16:
     case DataType::Type::kInt16:
-      __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
+      __ Ptrue(output_p_reg.VnH(), vixl::aarch64::SVE_ALL);
       break;
     case DataType::Type::kInt32:
     case DataType::Type::kFloat32:
-      __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
+      __ Ptrue(output_p_reg.VnS(), vixl::aarch64::SVE_ALL);
       break;
     case DataType::Type::kInt64:
     case DataType::Type::kFloat64:
-      __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
+      __ Ptrue(output_p_reg.VnD(), vixl::aarch64::SVE_ALL);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1208,6 +1207,67 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instru
   }
 }
 
+void LocationsBuilderARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecCondition(HVecCondition* instruction) {
+  DCHECK(instruction->IsPredicated());
+  LocationSummary* locations = instruction->GetLocations();
+  const ZRegister left = ZRegisterFrom(locations->InAt(0));
+  const ZRegister right = ZRegisterFrom(locations->InAt(1));
+  const PRegisterZ p_reg = GetVecGoverningPReg(instruction).Zeroing();
+  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
+
+  HVecOperation* a = instruction->InputAt(0)->AsVecOperation();
+  HVecOperation* b = instruction->InputAt(1)->AsVecOperation();
+  DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+            HVecOperation::ToSignedType(b->GetPackedType()));
+  ValidateVectorLength(instruction);
+
+  // TODO: Support other condition OPs and types.
+  switch (instruction->GetPackedType()) {
+    case DataType::Type::kUint8:
+    case DataType::Type::kInt8:
+      __ Cmpeq(output_p_reg.VnB(), p_reg, left.VnB(), right.VnB());
+      break;
+    case DataType::Type::kUint16:
+    case DataType::Type::kInt16:
+      __ Cmpeq(output_p_reg.VnH(), p_reg, left.VnH(), right.VnH());
+      break;
+    case DataType::Type::kInt32:
+      __ Cmpeq(output_p_reg.VnS(), p_reg, left.VnS(), right.VnS());
+      break;
+    case DataType::Type::kInt64:
+      __ Cmpeq(output_p_reg.VnD(), p_reg, left.VnD(), right.VnD());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+  DCHECK(instruction->InputAt(0)->IsVecPredSetOperation());
+  locations->SetInAt(0, Location::NoLocation());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARM64Sve::VisitVecPredNot(HVecPredNot* instruction) {
+  DCHECK(instruction->IsPredicated());
+
+  const PRegister input_p_reg = GetVecPredSetFixedOutPReg(
+      instruction->InputAt(0)->AsVecPredSetOperation());
+  const PRegister control_p_reg = GetVecGoverningPReg(instruction);
+  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
+
+  __ Not(output_p_reg.VnB(), control_p_reg.Zeroing(), input_p_reg.VnB());
+}
+
 void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -1218,8 +1278,8 @@ void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
   // Semantically, the out location of this instruction and predicate inputs locations of
   // its users should be a fixed predicate register (similar to
   // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
-  // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
-  // to the RA.
+  // SIMD regs (e.g. predicate), so fixed registers are used explicitly without exposing it
+  // to the RA (through GetVecPredSetFixedOutPReg()).
   //
   // To make the RA happy Location::NoLocation() was used for all the vector instructions
   // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
@@ -1241,21 +1301,22 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct
   DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
   Register left = InputRegisterAt(instruction, 0);
   Register right = InputRegisterAt(instruction, 1);
+  const PRegister output_p_reg = GetVecPredSetFixedOutPReg(instruction);
 
   DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
 
   switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
     case 1u:
-      __ Whilelo(LoopPReg().VnB(), left, right);
+      __ Whilelo(output_p_reg.VnB(), left, right);
       break;
     case 2u:
-      __ Whilelo(LoopPReg().VnH(), left, right);
+      __ Whilelo(output_p_reg.VnH(), left, right);
       break;
     case 4u:
-      __ Whilelo(LoopPReg().VnS(), left, right);
+      __ Whilelo(output_p_reg.VnS(), left, right);
       break;
     case 8u:
-      __ Whilelo(LoopPReg().VnD(), left, right);
+      __ Whilelo(output_p_reg.VnD(), left, right);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -1263,20 +1324,20 @@ void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruct
   }
 }
 
-void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
   locations->SetInAt(0, Location::NoLocation());
   // Result of the operation - a boolean value in a core register.
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARM64Sve::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
   // Instruction is not predicated, see nodes_vector.h
   DCHECK(!instruction->IsPredicated());
   Register reg = OutputRegister(instruction);
-  // Currently VecPredCondition is only used as part of vectorized loop check condition
+  // Currently VecPredToBoolean is only used as part of vectorized loop check condition
   // evaluation.
-  DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
+  DCHECK(instruction->GetPCondKind() == HVecPredToBoolean::PCondKind::kNFirst);
   __ Cset(reg, pl);
 }
 
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index e8ecf28386..70f22af17b 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -1069,12 +1069,32 @@ void InstructionCodeGeneratorARMVIXL::VisitVecPredWhile(HVecPredWhile* instructi
   UNREACHABLE();
 }
 
-void LocationsBuilderARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
 
-void InstructionCodeGeneratorARMVIXL::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorARMVIXL::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderARMVIXL::VisitVecPredNot(HVecPredNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecPredNot(HVecPredNot* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 343a6e1af4..1f9b2578ac 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1401,12 +1401,32 @@ void InstructionCodeGeneratorX86::VisitVecPredWhile(HVecPredWhile* instruction)
   UNREACHABLE();
 }
 
-void LocationsBuilderX86::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
 
-void InstructionCodeGeneratorX86::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorX86::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderX86::VisitVecPredNot(HVecPredNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86::VisitVecPredNot(HVecPredNot* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index fb6e4e753f..47afa3b4a1 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1374,12 +1374,32 @@ void InstructionCodeGeneratorX86_64::VisitVecPredWhile(HVecPredWhile* instructio
   UNREACHABLE();
 }
 
-void LocationsBuilderX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
+void LocationsBuilderX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
 
-void InstructionCodeGeneratorX86_64::VisitVecPredCondition(HVecPredCondition* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecPredToBoolean(HVecPredToBoolean* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecCondition(HVecCondition* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitVecPredNot(HVecPredNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  UNREACHABLE();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecPredNot(HVecPredNot* instruction) {
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
   UNREACHABLE();
 }
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index cb1cecc45a..b8c8d9f73d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -27,6 +27,7 @@
 #include "heap_poisoning.h"
 #include "interpreter/mterp/nterp.h"
 #include "intrinsics.h"
+#include "intrinsics_list.h"
 #include "intrinsics_utils.h"
 #include "intrinsics_x86.h"
 #include "jit/profiling_info.h"
@@ -38,6 +39,7 @@
 #include "optimizing/nodes.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
+#include "trace.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
 #include "utils/x86/assembler_x86.h"
@@ -839,7 +841,8 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
-               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
+               (instruction_->AsInvoke()->GetIntrinsic() ==
+                    Intrinsics::kJdkUnsafeGetObjectVolatile) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
             << instruction_->AsInvoke()->GetIntrinsic();
         DCHECK_EQ(offset_, 0U);
@@ -1107,6 +1110,7 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
 }
 
 namespace detail {
+
 // Mark which intrinsics we don't have handcrafted code for.
 template <Intrinsics T>
 struct IsUnimplemented {
@@ -1121,15 +1125,13 @@ struct IsUnimplemented {
 UNIMPLEMENTED_INTRINSIC_LIST_X86(TRUE_OVERRIDE)
 #undef TRUE_OVERRIDE
 
-#include "intrinsics_list.h"
 static constexpr bool kIsIntrinsicUnimplemented[] = {
-  false,  // kNone
+    false,  // kNone
 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
-  IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
-  INTRINSICS_LIST(IS_UNIMPLEMENTED)
+    IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+    ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
 #undef IS_UNIMPLEMENTED
 };
-#undef INTRINSICS_LIST
 
 }  // namespace detail
 
@@ -1140,8 +1142,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
                     kNumberOfCpuRegisters,
                     kNumberOfXmmRegisters,
                     kNumberOfRegisterPairs,
-                    ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
-                                        arraysize(kCoreCalleeSaves))
+                    ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
                         | (1 << kFakeReturnRegister),
                     0,
                     compiler_options,
@@ -1221,12 +1222,18 @@ void LocationsBuilderX86::VisitMethodExitHook(HMethodExitHook* method_hook) {
   LocationSummary* locations = new (GetGraph()->GetAllocator())
       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
   SetInForReturnValue(method_hook, locations);
+  // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
+  locations->AddTemp(Location::RegisterLocation(EAX));
+  locations->AddTemp(Location::RegisterLocation(EDX));
+  // An additional temporary register to hold address to store the timestamp counter.
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* instruction) {
   SlowPathCode* slow_path =
       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86(instruction);
   codegen_->AddSlowPath(slow_path);
+  LocationSummary* locations = instruction->GetLocations();
 
   if (instruction->IsMethodExitHook()) {
     // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
@@ -1242,8 +1249,51 @@ void InstructionCodeGeneratorX86::GenerateMethodEntryExitHook(HInstruction* inst
   MemberOffset  offset = instruction->IsMethodExitHook() ?
       instrumentation::Instrumentation::HaveMethodExitListenersOffset() :
       instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
-  __ cmpb(Address::Absolute(address + offset.Int32Value()), Immediate(0));
-  __ j(kNotEqual, slow_path->GetEntryLabel());
+  __ cmpb(Address::Absolute(address + offset.Int32Value()),
+          Immediate(instrumentation::Instrumentation::kFastTraceListeners));
+  // Check if there are any trace method entry / exit listeners. If no, continue.
+  __ j(kLess, slow_path->GetExitLabel());
+  // Check if there are any slow (jvmti / trace with thread cpu time) method entry / exit listeners.
+  // If yes, just take the slow path.
+  __ j(kGreater, slow_path->GetEntryLabel());
+
+  // For entry_addr use the first temp that isn't EAX or EDX. We need this after
+  // rdtsc which returns values in EAX + EDX.
+  Register entry_addr = locations->GetTemp(2).AsRegister<Register>();
+  Register index = locations->GetTemp(1).AsRegister<Register>();
+
+  // Check if there is place in the buffer for a new entry, if no, take slow path.
+  uint32_t trace_buffer_ptr = Thread::TraceBufferPtrOffset<kX86PointerSize>().Int32Value();
+  uint64_t trace_buffer_index_offset =
+      Thread::TraceBufferIndexOffset<kX86PointerSize>().Int32Value();
+
+  __ fs()->movl(index, Address::Absolute(trace_buffer_index_offset));
+  __ subl(index, Immediate(kNumEntriesForWallClock));
+  __ j(kLess, slow_path->GetEntryLabel());
+
+  // Update the index in the `Thread`.
+  __ fs()->movl(Address::Absolute(trace_buffer_index_offset), index);
+  // Calculate the entry address in the buffer.
+  // entry_addr = base_addr + sizeof(void*) * index
+  __ fs()->movl(entry_addr, Address::Absolute(trace_buffer_ptr));
+  __ leal(entry_addr, Address(entry_addr, index, TIMES_4, 0));
+
+  // Record method pointer and trace action.
+  Register method = index;
+  __ movl(method, Address(ESP, kCurrentMethodStackOffset));
+  // Use last two bits to encode trace method action. For MethodEntry it is 0
+  // so no need to set the bits since they are 0 already.
+  if (instruction->IsMethodExitHook()) {
+    DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+    __ orl(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+  }
+  __ movl(Address(entry_addr, kMethodOffsetInBytes), method);
+  // Get the timestamp. rdtsc returns timestamp in EAX + EDX.
+  __ rdtsc();
+  __ movl(Address(entry_addr, kTimestampOffsetInBytes), EAX);
+  __ movl(Address(entry_addr, kHighTimestampOffsetInBytes), EDX);
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1254,7 +1304,13 @@ void InstructionCodeGeneratorX86::VisitMethodExitHook(HMethodExitHook* instructi
 }
 
 void LocationsBuilderX86::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
-  new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  // We use rdtsc to obtain a timestamp for tracing. rdtsc returns the results in EAX + EDX.
+  locations->AddTemp(Location::RegisterLocation(EAX));
+  locations->AddTemp(Location::RegisterLocation(EDX));
+  // An additional temporary register to hold address to store the timestamp counter.
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86::VisitMethodEntryHook(HMethodEntryHook* instruction) {
@@ -1865,8 +1921,7 @@ void LocationsBuilderX86::VisitExit(HExit* exit) {
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorX86::VisitExit([[maybe_unused]] HExit* exit) {}
 
 template<class LabelType>
 void InstructionCodeGeneratorX86::GenerateFPJumps(HCondition* cond,
@@ -1981,7 +2036,7 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs,
                                                     Location rhs,
                                                     HInstruction* insn,
                                                     bool is_double) {
-  HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTable();
+  HX86LoadFromConstantTable* const_area = insn->InputAt(1)->AsX86LoadFromConstantTableOrNull();
   if (is_double) {
     if (rhs.IsFpuRegister()) {
       __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), rhs.AsFpuRegister<XmmRegister>());
@@ -2506,7 +2561,7 @@ void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2516,7 +2571,7 @@ void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2526,7 +2581,7 @@ void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2536,7 +2591,7 @@ void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2546,7 +2601,7 @@ void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86::VisitDoubleConstant(HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitDoubleConstant([[maybe_unused]] HDoubleConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2555,7 +2610,7 @@ void LocationsBuilderX86::VisitConstructorFence(HConstructorFence* constructor_f
 }
 
 void InstructionCodeGeneratorX86::VisitConstructorFence(
-    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HConstructorFence* constructor_fence) {
   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
 }
 
@@ -2571,7 +2626,7 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
   codegen_->GenerateFrameExit();
 }
 
@@ -2954,10 +3009,10 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) {
                                                  constant_area));
     __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
   } else {
-     __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
-                                                  neg->GetBaseMethodAddress(),
-                                                  constant_area));
-     __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
+    __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000),
+                                                 neg->GetBaseMethodAddress(),
+                                                 constant_area));
+    __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
   }
 }
 
@@ -5086,8 +5141,7 @@ void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) {
 }
 
 void InstructionCodeGeneratorX86::VisitParameterValue(
-    HParameterValue* instruction ATTRIBUTE_UNUSED) {
-}
+    [[maybe_unused]] HParameterValue* instruction) {}
 
 void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
   LocationSummary* locations =
@@ -5095,7 +5149,7 @@ void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) {
   locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
 }
 
-void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitCurrentMethod([[maybe_unused]] HCurrentMethod* instruction) {
 }
 
 void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) {
@@ -5294,7 +5348,7 @@ void LocationsBuilderX86::VisitPhi(HPhi* instruction) {
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorX86::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitPhi([[maybe_unused]] HPhi* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -5323,8 +5377,8 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
 }
 
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
-      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
-      ArtMethod* method ATTRIBUTE_UNUSED) {
+    const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+    [[maybe_unused]] ArtMethod* method) {
   return desired_dispatch_info;
 }
 
@@ -6749,7 +6803,7 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) {
   }
 }
 
-void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -7213,9 +7267,8 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
   Register out = out_loc.AsRegister<Register>();
 
   bool generate_null_check = false;
-  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
-      ? kWithoutReadBarrier
-      : gCompilerReadBarrierOption;
+  const ReadBarrierOption read_barrier_option =
+      cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
@@ -7445,7 +7498,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
       Address address = Address(method_address, CodeGeneratorX86::kPlaceholder32BitOffset);
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
       // /* GcRoot<mirror::String> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
       // No need for memory fence, thanks to the x86 memory model.
       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load);
       codegen_->AddSlowPath(slow_path);
@@ -7465,14 +7518,13 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
       Label* fixup_label = codegen_->NewJitRootStringPatch(
           load->GetDexFile(), load->GetStringIndex(), load->GetString());
       // /* GcRoot<mirror::String> */ out = *address
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
       return;
     }
     default:
       break;
   }
 
-  // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex().index_));
@@ -7498,7 +7550,7 @@ void LocationsBuilderX86::VisitClearException(HClearException* clear) {
   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
 }
 
-void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitClearException([[maybe_unused]] HClearException* clear) {
   __ fs()->movl(GetExceptionTlsAddress(), Immediate(0));
 }
 
@@ -7840,7 +7892,6 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
   } else {
     locations->SetInAt(1, Location::Any());
   }
-  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -8028,11 +8079,11 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
                                         kWithoutReadBarrier);
 
       // /* HeapReference<Class> */ temp = temp->iftable_
-      GenerateReferenceLoadTwoRegisters(instruction,
-                                        temp_loc,
-                                        temp_loc,
-                                        iftable_offset,
-                                        kWithoutReadBarrier);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       iftable_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // Iftable is never null.
       __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
       // Maybe poison the `cls` for direct comparison with memory.
@@ -8584,12 +8635,12 @@ void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction,
   __ Bind(slow_path->GetExitLabel());
 }
 
-void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
@@ -8782,13 +8833,15 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons
     case DataType::Type::kFloat32:
       __ movss(out.AsFpuRegister<XmmRegister>(),
                codegen_->LiteralFloatAddress(
-                  value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
+                   value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
       break;
 
     case DataType::Type::kFloat64:
       __ movsd(out.AsFpuRegister<XmmRegister>(),
                codegen_->LiteralDoubleAddress(
-                  value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area));
+                   value->AsDoubleConstant()->GetValue(),
+                   insn->GetBaseMethodAddress(),
+                   const_area));
       break;
 
     case DataType::Type::kInt32:
@@ -8877,7 +8930,7 @@ class JumpTableRIPFixup : public RIPFixup {
   const HX86PackedSwitch* switch_instr_;
 };
 
-void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorX86::Finalize() {
   // Generate the constant area if needed.
   X86Assembler* assembler = GetAssembler();
 
@@ -8897,7 +8950,7 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) {
   }
 
   // And finish up.
-  CodeGenerator::Finalize(allocator);
+  CodeGenerator::Finalize();
 }
 
 Address CodeGeneratorX86::LiteralDoubleAddress(double v,
@@ -8968,9 +9021,9 @@ Address CodeGeneratorX86::ArrayAddress(Register obj,
                                        Location index,
                                        ScaleFactor scale,
                                        uint32_t data_offset) {
-  return index.IsConstant() ?
-      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
-      Address(obj, index.AsRegister<Register>(), scale, data_offset);
+  return index.IsConstant()
+      ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
+      : Address(obj, index.AsRegister<Register>(), scale, data_offset);
 }
 
 Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr,
@@ -9025,7 +9078,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
-     dchecked_integral_cast<uint32_t>(address);
+      dchecked_integral_cast<uint32_t>(address);
 }
 
 void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
@@ -9042,13 +9095,13 @@ void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_da
   }
 }
 
-void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction
-                                                   ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86::VisitIntermediateAddress(
+    [[maybe_unused]] HIntermediateAddress* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction
-                                                           ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86::VisitIntermediateAddress(
+    [[maybe_unused]] HIntermediateAddress* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index d27155f31d..aa25528e08 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -196,7 +196,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
             ? Location::RegisterLocation(EDX)
             : Location::RegisterLocation(ECX));
   }
-  Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+  Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
     return Location::FpuRegisterLocation(XMM0);
   }
 
@@ -635,7 +635,7 @@ class CodeGeneratorX86 : public CodeGenerator {
 
   Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
 
-  void Finalize(CodeAllocator* allocator) override;
+  void Finalize() override;
 
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index eea6b204fa..f61a1f04c3 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -27,6 +27,7 @@
 #include "heap_poisoning.h"
 #include "interpreter/mterp/nterp.h"
 #include "intrinsics.h"
+#include "intrinsics_list.h"
 #include "intrinsics_utils.h"
 #include "intrinsics_x86_64.h"
 #include "jit/profiling_info.h"
@@ -39,6 +40,7 @@
 #include "optimizing/nodes.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
+#include "trace.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
 #include "utils/x86_64/assembler_x86_64.h"
@@ -856,7 +858,8 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
-               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
+               (instruction_->AsInvoke()->GetIntrinsic() ==
+                    Intrinsics::kJdkUnsafeGetObjectVolatile) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
             << instruction_->AsInvoke()->GetIntrinsic();
         DCHECK_EQ(offset_, 0U);
@@ -1070,8 +1073,8 @@ void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* location
 }
 
 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
-      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
-      ArtMethod* method ATTRIBUTE_UNUSED) {
+    const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
+    [[maybe_unused]] ArtMethod* method) {
   return desired_dispatch_info;
 }
 
@@ -1495,6 +1498,7 @@ void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
 }
 
 namespace detail {
+
 // Mark which intrinsics we don't have handcrafted code for.
 template <Intrinsics T>
 struct IsUnimplemented {
@@ -1509,15 +1513,13 @@ struct IsUnimplemented {
 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
 #undef TRUE_OVERRIDE
 
-#include "intrinsics_list.h"
 static constexpr bool kIsIntrinsicUnimplemented[] = {
-  false,  // kNone
+    false,  // kNone
 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
-  IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
-  INTRINSICS_LIST(IS_UNIMPLEMENTED)
+    IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
+    ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
 #undef IS_UNIMPLEMENTED
 };
-#undef INTRINSICS_LIST
 
 }  // namespace detail
 
@@ -1531,11 +1533,9 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
                     kNumberOfCpuRegisters,
                     kNumberOfFloatRegisters,
                     kNumberOfCpuRegisterPairs,
-                    ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
-                                        arraysize(kCoreCalleeSaves))
+                    ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
                         | (1 << kFakeReturnRegister),
-                    ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
-                                        arraysize(kFpuCalleeSaves)),
+                    ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
                     compiler_options,
                     stats,
                     ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
@@ -1585,12 +1585,18 @@ static dwarf::Reg DWARFReg(FloatRegister reg) {
 }
 
 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
-  new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  LocationSummary* locations = new (GetGraph()->GetAllocator())
+      LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
+  // We use rdtsc to record the timestamp for method profiling. rdtsc returns
+  // two 32-bit values in EAX + EDX even on 64-bit architectures.
+  locations->AddTemp(Location::RegisterLocation(RAX));
+  locations->AddTemp(Location::RegisterLocation(RDX));
 }
 
 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
   SlowPathCode* slow_path =
       new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
+  LocationSummary* locations = instruction->GetLocations();
   codegen_->AddSlowPath(slow_path);
 
   if (instruction->IsMethodExitHook()) {
@@ -1609,8 +1615,51 @@ void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* i
       instrumentation::Instrumentation::HaveMethodExitListenersOffset()
       : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
   __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
-  __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
-  __ j(kNotEqual, slow_path->GetEntryLabel());
+  __ cmpb(Address(CpuRegister(TMP), 0),
+          Immediate(instrumentation::Instrumentation::kFastTraceListeners));
+  // Check if there are any method entry / exit listeners. If no, continue with execution.
+  __ j(kLess, slow_path->GetExitLabel());
+  // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
+  __ j(kGreater, slow_path->GetEntryLabel());
+
+  // Check if there is place in the buffer for a new entry, if no, take slow path.
+  CpuRegister index = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister entry_addr = CpuRegister(TMP);
+  uint64_t trace_buffer_index_offset =
+      Thread::TraceBufferIndexOffset<kX86_64PointerSize>().SizeValue();
+  __ gs()->movq(CpuRegister(index),
+                Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true));
+  __ subq(CpuRegister(index), Immediate(kNumEntriesForWallClock));
+  __ j(kLess, slow_path->GetEntryLabel());
+
+  // Update the index in the `Thread`.
+  __ gs()->movq(Address::Absolute(trace_buffer_index_offset, /* no_rip= */ true),
+                CpuRegister(index));
+  // Calculate the entry address in the buffer.
+  // entry_addr = base_addr + sizeof(void*) * index
+  __ gs()->movq(entry_addr,
+                Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
+                                  /* no_rip= */ true));
+  __ leaq(CpuRegister(entry_addr),
+          Address(CpuRegister(entry_addr), CpuRegister(index), TIMES_8, 0));
+
+  // Record method pointer and action.
+  CpuRegister method = index;
+  __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+  // Use last two bits to encode trace method action. For MethodEntry it is 0
+  // so no need to set the bits since they are 0 already.
+  if (instruction->IsMethodExitHook()) {
+    DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
+    static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
+    __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
+  }
+  __ movq(Address(entry_addr, kMethodOffsetInBytes), CpuRegister(method));
+  // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
+  __ rdtsc();
+  __ shlq(CpuRegister(RDX), Immediate(32));
+  __ orq(CpuRegister(RAX), CpuRegister(RDX));
+  __ movq(Address(entry_addr, kTimestampOffsetInBytes), CpuRegister(RAX));
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1651,6 +1700,10 @@ void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
   LocationSummary* locations = new (GetGraph()->GetAllocator())
       LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
   SetInForReturnValue(method_hook, locations);
+  // We use rdtsc to record the timestamp for method profiling. rdtsc returns
+  // two 32-bit values in EAX + EDX even on 64-bit architectures.
+  locations->AddTemp(Location::RegisterLocation(RAX));
+  locations->AddTemp(Location::RegisterLocation(RDX));
 }
 
 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
@@ -1949,8 +2002,9 @@ void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
   Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
 }
 
-void CodeGeneratorX86_64::MoveLocation(
-    Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
+void CodeGeneratorX86_64::MoveLocation(Location dst,
+                                       Location src,
+                                       [[maybe_unused]] DataType::Type dst_type) {
   Move(dst, src);
 }
 
@@ -2009,8 +2063,7 @@ void LocationsBuilderX86_64::VisitExit(HExit* exit) {
   exit->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
-}
+void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
 
 template<class LabelType>
 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
@@ -2051,7 +2104,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition)
       } else if (right.IsConstant()) {
         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
                    codegen_->LiteralFloatAddress(
-                     right.GetConstant()->AsFloatConstant()->GetValue()));
+                       right.GetConstant()->AsFloatConstant()->GetValue()));
       } else {
         DCHECK(right.IsStackSlot());
         __ ucomiss(left.AsFpuRegister<XmmRegister>(),
@@ -2065,7 +2118,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition)
       } else if (right.IsConstant()) {
         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
                    codegen_->LiteralDoubleAddress(
-                     right.GetConstant()->AsDoubleConstant()->GetValue()));
+                       right.GetConstant()->AsDoubleConstant()->GetValue()));
       } else {
         DCHECK(right.IsDoubleStackSlot());
         __ ucomisd(left.AsFpuRegister<XmmRegister>(),
@@ -2657,7 +2710,7 @@ void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2667,7 +2720,7 @@ void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2677,7 +2730,7 @@ void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2687,7 +2740,7 @@ void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
   locations->SetOut(Location::ConstantLocation(constant));
 }
 
-void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2698,7 +2751,7 @@ void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
-    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HDoubleConstant* constant) {
   // Will be generated at use site.
 }
 
@@ -2707,7 +2760,7 @@ void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructo
 }
 
 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
-    HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HConstructorFence* constructor_fence) {
   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
 }
 
@@ -2723,7 +2776,7 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
   ret->SetLocations(nullptr);
 }
 
-void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
   codegen_->GenerateFrameExit();
 }
 
@@ -4972,7 +5025,7 @@ void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitParameterValue(
-    HParameterValue* instruction ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HParameterValue* instruction) {
   // Nothing to do, the parameter is already at its location.
 }
 
@@ -4983,7 +5036,7 @@ void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
 }
 
 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
-    HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HCurrentMethod* instruction) {
   // Nothing to do, the method is already at its location.
 }
 
@@ -5062,7 +5115,7 @@ void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
   locations->SetOut(Location::Any());
 }
 
-void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
   LOG(FATAL) << "Unimplemented";
 }
 
@@ -5930,8 +5983,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
         __ movsd(address, value.AsFpuRegister<XmmRegister>());
         codegen_->MaybeRecordImplicitNullCheck(instruction);
       } else {
-        int64_t v =
-            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
         Address address_high =
             CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
         codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
@@ -6084,7 +6136,7 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
   }
 }
 
-void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
   LOG(FATAL) << "Unimplemented";
 }
 
@@ -6471,7 +6523,9 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister());
-  if (load_kind == HLoadClass::LoadKind::kBssEntry) {
+  if (load_kind == HLoadClass::LoadKind::kBssEntry ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
+      load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
     if (!gUseReadBarrier || kUseBakerReadBarrier) {
       // Rely on the type resolution and/or initialization to save everything.
       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
@@ -6507,9 +6561,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
 
-  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
-      ? kWithoutReadBarrier
-      : gCompilerReadBarrierOption;
+  const ReadBarrierOption read_barrier_option =
+      cls->IsInBootImage() ? kWithoutReadBarrier : GetCompilerReadBarrierOption();
   bool generate_null_check = false;
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -6704,7 +6757,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
                                           /* no_rip= */ false);
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
       // No need for memory fence, thanks to the x86-64 memory model.
       SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
       codegen_->AddSlowPath(slow_path);
@@ -6725,14 +6778,13 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
       Label* fixup_label = codegen_->NewJitRootStringPatch(
           load->GetDexFile(), load->GetStringIndex(), load->GetString());
       // /* GcRoot<mirror::String> */ out = *address
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, gCompilerReadBarrierOption);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, GetCompilerReadBarrierOption());
       return;
     }
     default:
       break;
   }
 
-  // TODO: Re-add the compiler code to do string dex cache lookup again.
   // Custom calling convention: RAX serves as both input and output.
   __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
   codegen_->InvokeRuntime(kQuickResolveString,
@@ -6760,7 +6812,7 @@ void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
 }
 
-void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
   __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
 }
 
@@ -7112,7 +7164,6 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
   } else {
     locations->SetInAt(1, Location::Any());
   }
-  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
@@ -7301,11 +7352,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
                                         kWithoutReadBarrier);
 
       // /* HeapReference<Class> */ temp = temp->iftable_
-      GenerateReferenceLoadTwoRegisters(instruction,
-                                        temp_loc,
-                                        temp_loc,
-                                        iftable_offset,
-                                        kWithoutReadBarrier);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       iftable_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
       // Iftable is never null.
       __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
       // Maybe poison the `cls` for direct comparison with memory.
@@ -7830,12 +7881,12 @@ void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instructi
   __ Bind(slow_path->GetExitLabel());
 }
 
-void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
   // Nothing to do, this should be removed during prepare for register allocator.
   LOG(FATAL) << "Unreachable";
 }
@@ -7930,13 +7981,13 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins
   __ jmp(temp_reg);
 }
 
-void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
-                                                      ATTRIBUTE_UNUSED) {
+void LocationsBuilderX86_64::VisitIntermediateAddress(
+    [[maybe_unused]] HIntermediateAddress* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
-void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
-                                                              ATTRIBUTE_UNUSED) {
+void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
+    [[maybe_unused]] HIntermediateAddress* instruction) {
   LOG(FATAL) << "Unreachable";
 }
 
@@ -8037,9 +8088,9 @@ Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
                                           Location index,
                                           ScaleFactor scale,
                                           uint32_t data_offset) {
-  return index.IsConstant() ?
-      Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
-      Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
+  return index.IsConstant()
+      ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
+      : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
 }
 
 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
@@ -8119,7 +8170,7 @@ class JumpTableRIPFixup : public RIPFixup {
   const HPackedSwitch* switch_instr_;
 };
 
-void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+void CodeGeneratorX86_64::Finalize() {
   // Generate the constant area if needed.
   X86_64Assembler* assembler = GetAssembler();
   if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
@@ -8137,7 +8188,7 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   }
 
   // And finish up.
-  CodeGenerator::Finalize(allocator);
+  CodeGenerator::Finalize();
 }
 
 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
@@ -8217,7 +8268,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
   using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
   reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
-     dchecked_integral_cast<uint32_t>(address);
+      dchecked_integral_cast<uint32_t>(address);
 }
 
 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dff2e799e0..5a940c1466 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -162,16 +162,16 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
   Location GetFieldIndexLocation() const override {
     return Location::RegisterLocation(RDI);
   }
-  Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+  Location GetReturnLocation([[maybe_unused]] DataType::Type type) const override {
     return Location::RegisterLocation(RAX);
   }
-  Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance)
-      const override {
+  Location GetSetValueLocation([[maybe_unused]] DataType::Type type,
+                               bool is_instance) const override {
     return is_instance
         ? Location::RegisterLocation(RDX)
         : Location::RegisterLocation(RSI);
   }
-  Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
+  Location GetFpuLocation([[maybe_unused]] DataType::Type type) const override {
     return Location::FpuRegisterLocation(XMM0);
   }
 
@@ -468,7 +468,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
   void SetupBlockedRegisters() const override;
   void DumpCoreRegister(std::ostream& stream, int reg) const override;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
-  void Finalize(CodeAllocator* allocator) override;
+  void Finalize() override;
 
   InstructionSet GetInstructionSet() const override {
     return InstructionSet::kX86_64;
@@ -502,9 +502,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
     block_labels_ = CommonInitializeLabels<Label>();
   }
 
-  bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
-    return false;
-  }
+  bool NeedsTwoRegisters([[maybe_unused]] DataType::Type type) const override { return false; }
 
   // Check if the desired_string_load_kind is supported. If it is, return it,
   // otherwise return a fall-back kind that should be used instead.
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index d759a16f48..33b5bd5169 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -16,6 +16,9 @@
 
 #include "code_sinking.h"
 
+#include <sstream>
+
+#include "android-base/logging.h"
 #include "base/arena_bit_vector.h"
 #include "base/array_ref.h"
 #include "base/bit_vector-inl.h"
@@ -335,10 +338,6 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
   processed_instructions.ClearAllBits();
   ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false);
   post_dominated.ClearAllBits();
-  ArenaBitVector instructions_that_can_move(
-      &allocator, number_of_instructions, /* expandable= */ false);
-  instructions_that_can_move.ClearAllBits();
-  ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
 
   // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
   // TODO(ngeoffray): Getting the full set of post-dominated should be done by
@@ -411,6 +410,13 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
   HBasicBlock* common_dominator = finder.Get();
 
   // Step (2): iterate over the worklist to find sinking candidates.
+  ArenaBitVector instructions_that_can_move(
+      &allocator, number_of_instructions, /* expandable= */ false);
+  instructions_that_can_move.ClearAllBits();
+  ScopedArenaVector<ScopedArenaVector<HInstruction*>> instructions_to_move(
+      graph_->GetBlocks().size(),
+      ScopedArenaVector<HInstruction*>(allocator.Adapter(kArenaAllocMisc)),
+      allocator.Adapter(kArenaAllocMisc));
   while (!worklist.empty()) {
     HInstruction* instruction = worklist.back();
     if (processed_instructions.IsBitSet(instruction->GetId())) {
@@ -467,7 +473,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
       // Instruction is a candidate for being sunk. Mark it as such, remove it from the
       // work list, and add its inputs to the work list.
       instructions_that_can_move.SetBit(instruction->GetId());
-      move_in_order.push_back(instruction);
+      instructions_to_move[instruction->GetBlock()->GetBlockId()].push_back(instruction);
       processed_instructions.SetBit(instruction->GetId());
       worklist.pop_back();
       AddInputs(instruction, processed_instructions, post_dominated, &worklist);
@@ -493,14 +499,50 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
     }
   }
 
-  // Make sure we process instructions in dominated order. This is required for heap
-  // stores.
-  std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
-    return b->StrictlyDominates(a);
-  });
+  // We want to process the instructions in reverse dominated order. This is required for heap
+  // stores. To guarantee this (including the transitivity of incomparability) we have some extra
+  // bookkeeping.
+  ScopedArenaVector<HInstruction*> instructions_to_move_sorted(allocator.Adapter(kArenaAllocMisc));
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    const int block_id = block->GetBlockId();
+
+    // Order the block itself first.
+    std::sort(instructions_to_move[block_id].begin(),
+              instructions_to_move[block_id].end(),
+              [&block](HInstruction* a, HInstruction* b) {
+                return block->GetInstructions().FoundBefore(b, a);
+              });
+
+    for (HInstruction* instruction : instructions_to_move[block_id]) {
+      instructions_to_move_sorted.push_back(instruction);
+    }
+  }
+
+  if (kIsDebugBuild) {
+    // We should have ordered the instructions in reverse dominated order. This means that
+    // instructions shouldn't dominate instructions that come after it in the vector.
+    for (size_t i = 0; i < instructions_to_move_sorted.size(); ++i) {
+      for (size_t j = i + 1; j < instructions_to_move_sorted.size(); ++j) {
+        if (instructions_to_move_sorted[i]->StrictlyDominates(instructions_to_move_sorted[j])) {
+          std::stringstream ss;
+          graph_->Dump(ss, nullptr);
+          ss << "\n"
+             << "{";
+          for (HInstruction* instr : instructions_to_move_sorted) {
+            ss << *instr << " in block: " << instr->GetBlock() << ", ";
+          }
+          ss << "}\n";
+          ss << "i = " << i << " which is " << *instructions_to_move_sorted[i]
+             << "strictly dominates j = " << j << " which is " << *instructions_to_move_sorted[j]
+             << "\n";
+          LOG(FATAL) << "Unexpected ordering of code sinking instructions: " << ss.str();
+        }
+      }
+    }
+  }
 
   // Step (3): Try to move sinking candidates.
-  for (HInstruction* instruction : move_in_order) {
+  for (HInstruction* instruction : instructions_to_move_sorted) {
     HInstruction* position = nullptr;
     if (instruction->IsArraySet()
             || instruction->IsInstanceFieldSet()
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index 2d9acc49b3..c72d3ea24a 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -733,8 +733,7 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
   move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), DataType::Type::kInt32, nullptr);
   codegen.GetMoveResolver()->EmitNativeCode(move);
 
-  InternalCodeAllocator code_allocator;
-  codegen.Finalize(&code_allocator);
+  codegen.Finalize();
 }
 #endif
 
@@ -785,8 +784,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
                 nullptr);
   codegen.GetMoveResolver()->EmitNativeCode(move);
 
-  InternalCodeAllocator code_allocator;
-  codegen.Finalize(&code_allocator);
+  codegen.Finalize();
 }
 
 // Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off.
@@ -798,7 +796,7 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
 
   codegen.Initialize();
 
-  graph->SetHasSIMD(true);
+  graph->SetHasTraditionalSIMD(true);
   for (int i = 0; i < 2; i++) {
     HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator());
     move->AddMove(Location::SIMDStackSlot(0),
@@ -818,11 +816,10 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
                   DataType::Type::kFloat64,
                   nullptr);
     codegen.GetMoveResolver()->EmitNativeCode(move);
-    graph->SetHasSIMD(false);
+    graph->SetHasTraditionalSIMD(false);
   }
 
-  InternalCodeAllocator code_allocator;
-  codegen.Finalize(&code_allocator);
+  codegen.Finalize();
 }
 
 // Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example).
@@ -867,7 +864,7 @@ TEST_F(CodegenTest, ARM64FrameSizeSIMD) {
   arm64::CodeGeneratorARM64 codegen(graph, *compiler_options);
 
   codegen.Initialize();
-  graph->SetHasSIMD(true);
+  graph->SetHasTraditionalSIMD(true);
 
   DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8);
   vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers;
@@ -887,7 +884,8 @@ TEST_F(CodegenTest, ARM64FrameSizeNoSIMD) {
   arm64::CodeGeneratorARM64 codegen(graph, *compiler_options);
 
   codegen.Initialize();
-  graph->SetHasSIMD(false);
+  graph->SetHasTraditionalSIMD(false);
+  graph->SetHasPredicatedSIMD(false);
 
   DCHECK_EQ(arm64::callee_saved_fp_registers.GetCount(), 8);
   vixl::aarch64::CPURegList reg_list = arm64::callee_saved_fp_registers;
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index 7af9d0f44c..a8425c9915 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -103,8 +103,8 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
     blocked_core_registers_[arm::R7] = false;
   }
 
-  void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
-                                         Location temp_loc ATTRIBUTE_UNUSED) override {
+  void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int code,
+                                         [[maybe_unused]] Location temp_loc) override {
     // When turned on, the marking register checks in
     // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
     // Thread Register and the Marking Register to be set to
@@ -135,8 +135,8 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
   TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options)
       : arm64::CodeGeneratorARM64(graph, compiler_options) {}
 
-  void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED,
-                                         Location temp_loc ATTRIBUTE_UNUSED) override {
+  void MaybeGenerateMarkingRegisterCheck([[maybe_unused]] int codem,
+                                         [[maybe_unused]] Location temp_loc) override {
     // When turned on, the marking register checks in
     // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the
     // Thread Register and the Marking Register to be set to
@@ -167,28 +167,6 @@ class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
 };
 #endif
 
-class InternalCodeAllocator : public CodeAllocator {
- public:
-  InternalCodeAllocator() : size_(0) { }
-
-  uint8_t* Allocate(size_t size) override {
-    size_ = size;
-    memory_.reset(new uint8_t[size]);
-    return memory_.get();
-  }
-
-  size_t GetSize() const { return size_; }
-  ArrayRef<const uint8_t> GetMemory() const override {
-    return ArrayRef<const uint8_t>(memory_.get(), size_);
-  }
-
- private:
-  size_t size_;
-  std::unique_ptr<uint8_t[]> memory_;
-
-  DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
-};
-
 static bool CanExecuteOnHardware(InstructionSet target_isa) {
   return (target_isa == kRuntimeISA)
       // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
@@ -247,8 +225,7 @@ static void VerifyGeneratedCode(InstructionSet target_isa,
 }
 
 template <typename Expected>
-static void Run(const InternalCodeAllocator& allocator,
-                const CodeGenerator& codegen,
+static void Run(const CodeGenerator& codegen,
                 bool has_result,
                 Expected expected) {
   InstructionSet target_isa = codegen.GetInstructionSet();
@@ -260,7 +237,7 @@ static void Run(const InternalCodeAllocator& allocator,
   };
   CodeHolder code_holder;
   const void* method_code =
-      code_holder.MakeExecutable(allocator.GetMemory(), ArrayRef<const uint8_t>(), target_isa);
+      code_holder.MakeExecutable(codegen.GetCode(), ArrayRef<const uint8_t>(), target_isa);
 
   using fptr = Expected (*)();
   fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(method_code));
@@ -294,9 +271,8 @@ static void RunCodeNoCheck(CodeGenerator* codegen,
     register_allocator->AllocateRegisters();
   }
   hook_before_codegen(graph);
-  InternalCodeAllocator allocator;
-  codegen->Compile(&allocator);
-  Run(allocator, *codegen, has_result, expected);
+  codegen->Compile();
+  Run(*codegen, has_result, expected);
 }
 
 template <typename Expected>
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 20b0e38af5..e2ef8d52f2 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -311,10 +311,8 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
   }
 }
 
-inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
-                                                 HInstruction* instr) {
-  if (constant->IsConstant()
-      && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
+  if (constant->IsConstant() && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
     return Location::ConstantLocation(constant);
   }
 
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 06d19e3f29..e20d9e83e6 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -37,6 +37,13 @@ class HConstantFoldingVisitor final : public HGraphDelegateVisitor {
   void VisitUnaryOperation(HUnaryOperation* inst) override;
   void VisitBinaryOperation(HBinaryOperation* inst) override;
 
+  // Tries to replace constants in binary operations like:
+  // * BinaryOp(Select(false_constant, true_constant, condition), other_constant), or
+  // * BinaryOp(other_constant, Select(false_constant, true_constant, condition))
+  // with consolidated constants. For example, Add(Select(10, 20, condition), 5) can be replaced
+  // with Select(15, 25, condition).
+  bool TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst);
+
   void VisitArrayLength(HArrayLength* inst) override;
   void VisitDivZeroCheck(HDivZeroCheck* inst) override;
   void VisitIf(HIf* inst) override;
@@ -113,9 +120,69 @@ void HConstantFoldingVisitor::VisitUnaryOperation(HUnaryOperation* inst) {
   if (constant != nullptr) {
     inst->ReplaceWith(constant);
     inst->GetBlock()->RemoveInstruction(inst);
+  } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) {
+    // Try to replace the select's inputs in Select+UnaryOperation cases. We can do this if both
+    // inputs to the select are constants, and this is the only use of the select.
+    HSelect* select = inst->InputAt(0)->AsSelect();
+    HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue());
+    if (false_constant == nullptr) {
+      return;
+    }
+    HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue());
+    if (true_constant == nullptr) {
+      return;
+    }
+    DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+    DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+    select->ReplaceInput(false_constant, 0);
+    select->ReplaceInput(true_constant, 1);
+    select->UpdateType();
+    inst->ReplaceWith(select);
+    inst->GetBlock()->RemoveInstruction(inst);
   }
 }
 
+bool HConstantFoldingVisitor::TryRemoveBinaryOperationViaSelect(HBinaryOperation* inst) {
+  if (inst->GetLeft()->IsSelect() == inst->GetRight()->IsSelect()) {
+    // If both of them are constants, VisitBinaryOperation already tried the static evaluation. If
+    // both of them are selects, then we can't simplify.
+    // TODO(solanes): Technically, if both of them are selects we could simplify iff both select's
+    // conditions are equal e.g. Add(Select(1, 2, cond), Select(3, 4, cond)) could be replaced with
+    // Select(4, 6, cond). This seems very unlikely to happen so we don't implement it.
+    return false;
+  }
+
+  const bool left_is_select = inst->GetLeft()->IsSelect();
+  HSelect* select = left_is_select ? inst->GetLeft()->AsSelect() : inst->GetRight()->AsSelect();
+  HInstruction* maybe_constant = left_is_select ? inst->GetRight() : inst->GetLeft();
+
+  if (select->HasOnlyOneNonEnvironmentUse()) {
+    // Try to replace the select's inputs in Select+BinaryOperation. We can do this if both
+    // inputs to the select are constants, and this is the only use of the select.
+    HConstant* false_constant =
+        inst->TryStaticEvaluation(left_is_select ? select->GetFalseValue() : maybe_constant,
+                                  left_is_select ? maybe_constant : select->GetFalseValue());
+    if (false_constant == nullptr) {
+      return false;
+    }
+    HConstant* true_constant =
+        inst->TryStaticEvaluation(left_is_select ? select->GetTrueValue() : maybe_constant,
+                                  left_is_select ? maybe_constant : select->GetTrueValue());
+    if (true_constant == nullptr) {
+      return false;
+    }
+    DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+    DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+    select->ReplaceInput(false_constant, 0);
+    select->ReplaceInput(true_constant, 1);
+    select->UpdateType();
+    inst->ReplaceWith(select);
+    inst->GetBlock()->RemoveInstruction(inst);
+    return true;
+  }
+  return false;
+}
+
 void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
   // Constant folding: replace `op(a, b)' with a constant at
   // compile time if `a' and `b' are both constants.
@@ -123,6 +190,8 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) {
   if (constant != nullptr) {
     inst->ReplaceWith(constant);
     inst->GetBlock()->RemoveInstruction(inst);
+  } else if (TryRemoveBinaryOperationViaSelect(inst)) {
+    // Already replaced inside TryRemoveBinaryOperationViaSelect.
   } else {
     InstructionWithAbsorbingInputSimplifier simplifier(GetGraph());
     inst->Accept(&simplifier);
@@ -299,6 +368,25 @@ void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) {
   if (constant != nullptr) {
     inst->ReplaceWith(constant);
     inst->GetBlock()->RemoveInstruction(inst);
+  } else if (inst->InputAt(0)->IsSelect() && inst->InputAt(0)->HasOnlyOneNonEnvironmentUse()) {
+    // Try to replace the select's inputs in Select+TypeConversion. We can do this if both
+    // inputs to the select are constants, and this is the only use of the select.
+    HSelect* select = inst->InputAt(0)->AsSelect();
+    HConstant* false_constant = inst->TryStaticEvaluation(select->GetFalseValue());
+    if (false_constant == nullptr) {
+      return;
+    }
+    HConstant* true_constant = inst->TryStaticEvaluation(select->GetTrueValue());
+    if (true_constant == nullptr) {
+      return;
+    }
+    DCHECK_EQ(select->InputAt(0), select->GetFalseValue());
+    DCHECK_EQ(select->InputAt(1), select->GetTrueValue());
+    select->ReplaceInput(false_constant, 0);
+    select->ReplaceInput(true_constant, 1);
+    select->UpdateType();
+    inst->ReplaceWith(select);
+    inst->GetBlock()->RemoveInstruction(inst);
   }
 }
 
@@ -583,7 +671,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) {
     block->RemoveInstruction(instruction);
   }
 
-  HConstant* cst_right = instruction->GetRight()->AsConstant();
+  HConstant* cst_right = instruction->GetRight()->AsConstantOrNull();
   if (((cst_right != nullptr) &&
        (cst_right->IsOne() || cst_right->IsMinusOne())) ||
       (instruction->GetLeft() == instruction->GetRight())) {
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
index d9b7652f32..48635cfd15 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -78,7 +78,7 @@ class CFREVisitor final : public HGraphVisitor {
     VisitSetLocation(instruction, value);
   }
 
-  void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override {
+  void VisitDeoptimize([[maybe_unused]] HDeoptimize* instruction) override {
     // Pessimize: Merge all fences.
     MergeCandidateFences();
   }
@@ -151,7 +151,7 @@ class CFREVisitor final : public HGraphVisitor {
     }
   }
 
-  void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) {
+  void VisitSetLocation([[maybe_unused]] HInstruction* inst, HInstruction* store_input) {
     // An object is considered "published" if it's stored onto the heap.
     // Sidenote: A later "LSE" pass can still remove the fence if it proves the
     // object doesn't actually escape.
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index cf49e39849..8e6b6db236 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -24,6 +24,7 @@
 #include "base/scoped_arena_containers.h"
 #include "base/stl_util.h"
 #include "optimizing/nodes.h"
+#include "optimizing/nodes_vector.h"
 #include "ssa_phi_elimination.h"
 
 namespace art HIDDEN {
@@ -842,7 +843,8 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
 
 void HDeadCodeElimination::UpdateGraphFlags() {
   bool has_monitor_operations = false;
-  bool has_simd = false;
+  bool has_traditional_simd = false;
+  bool has_predicated_simd = false;
   bool has_bounds_checks = false;
   bool has_always_throwing_invokes = false;
 
@@ -852,7 +854,12 @@ void HDeadCodeElimination::UpdateGraphFlags() {
       if (instruction->IsMonitorOperation()) {
         has_monitor_operations = true;
       } else if (instruction->IsVecOperation()) {
-        has_simd = true;
+        HVecOperation* vec_instruction = instruction->AsVecOperation();
+        if (vec_instruction->IsPredicated()) {
+          has_predicated_simd = true;
+        } else {
+          has_traditional_simd = true;
+        }
       } else if (instruction->IsBoundsCheck()) {
         has_bounds_checks = true;
       } else if (instruction->IsInvoke() && instruction->AsInvoke()->AlwaysThrows()) {
@@ -862,7 +869,8 @@ void HDeadCodeElimination::UpdateGraphFlags() {
   }
 
   graph_->SetHasMonitorOperations(has_monitor_operations);
-  graph_->SetHasSIMD(has_simd);
+  graph_->SetHasTraditionalSIMD(has_traditional_simd);
+  graph_->SetHasPredicatedSIMD(has_predicated_simd);
   graph_->SetHasBoundsChecks(has_bounds_checks);
   graph_->SetHasAlwaysThrowingInvokes(has_always_throwing_invokes);
 }
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 190b362145..31ba3fe98a 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -168,52 +168,68 @@ void GraphChecker::CheckGraphFlags() {
 void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
   current_block_ = block;
 
-  // Use local allocator for allocating memory.
-  ScopedArenaAllocator allocator(GetGraph()->GetArenaStack());
-
-  // Check consistency with respect to predecessors of `block`.
-  // Note: Counting duplicates with a sorted vector uses up to 6x less memory
-  // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
-  ScopedArenaVector<HBasicBlock*> sorted_predecessors(allocator.Adapter(kArenaAllocGraphChecker));
-  sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
-  std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
-  for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) {
-    HBasicBlock* p = *it++;
-    size_t p_count_in_block_predecessors = 1u;
-    for (; it != end && *it == p; ++it) {
-      ++p_count_in_block_predecessors;
-    }
-    size_t block_count_in_p_successors =
-        std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
-    if (p_count_in_block_predecessors != block_count_in_p_successors) {
-      AddError(StringPrintf(
-          "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
-          "block %d lists %zu occurrences of block %d in its successors.",
-          block->GetBlockId(), p_count_in_block_predecessors, p->GetBlockId(),
-          p->GetBlockId(), block_count_in_p_successors, block->GetBlockId()));
-    }
-  }
+  {
+    // Use local allocator for allocating memory. We use C++ scopes (i.e. `{}`) to reclaim the
+    // memory as soon as possible, and to end the scope of this `ScopedArenaAllocator`.
+    ScopedArenaAllocator allocator(GetGraph()->GetArenaStack());
 
-  // Check consistency with respect to successors of `block`.
-  // Note: Counting duplicates with a sorted vector uses up to 6x less memory
-  // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
-  ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker));
-  sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
-  std::sort(sorted_successors.begin(), sorted_successors.end());
-  for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) {
-    HBasicBlock* s = *it++;
-    size_t s_count_in_block_successors = 1u;
-    for (; it != end && *it == s; ++it) {
-      ++s_count_in_block_successors;
+    {
+      // Check consistency with respect to predecessors of `block`.
+      // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+      // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+      ScopedArenaVector<HBasicBlock*> sorted_predecessors(
+          allocator.Adapter(kArenaAllocGraphChecker));
+      sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end());
+      std::sort(sorted_predecessors.begin(), sorted_predecessors.end());
+      for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end;) {
+        HBasicBlock* p = *it++;
+        size_t p_count_in_block_predecessors = 1u;
+        for (; it != end && *it == p; ++it) {
+          ++p_count_in_block_predecessors;
+        }
+        size_t block_count_in_p_successors =
+            std::count(p->GetSuccessors().begin(), p->GetSuccessors().end(), block);
+        if (p_count_in_block_predecessors != block_count_in_p_successors) {
+          AddError(StringPrintf(
+              "Block %d lists %zu occurrences of block %d in its predecessors, whereas "
+              "block %d lists %zu occurrences of block %d in its successors.",
+              block->GetBlockId(),
+              p_count_in_block_predecessors,
+              p->GetBlockId(),
+              p->GetBlockId(),
+              block_count_in_p_successors,
+              block->GetBlockId()));
+        }
+      }
     }
-    size_t block_count_in_s_predecessors =
-        std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
-    if (s_count_in_block_successors != block_count_in_s_predecessors) {
-      AddError(StringPrintf(
-          "Block %d lists %zu occurrences of block %d in its successors, whereas "
-          "block %d lists %zu occurrences of block %d in its predecessors.",
-          block->GetBlockId(), s_count_in_block_successors, s->GetBlockId(),
-          s->GetBlockId(), block_count_in_s_predecessors, block->GetBlockId()));
+
+    {
+      // Check consistency with respect to successors of `block`.
+      // Note: Counting duplicates with a sorted vector uses up to 6x less memory
+      // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse.
+      ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker));
+      sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end());
+      std::sort(sorted_successors.begin(), sorted_successors.end());
+      for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end;) {
+        HBasicBlock* s = *it++;
+        size_t s_count_in_block_successors = 1u;
+        for (; it != end && *it == s; ++it) {
+          ++s_count_in_block_successors;
+        }
+        size_t block_count_in_s_predecessors =
+            std::count(s->GetPredecessors().begin(), s->GetPredecessors().end(), block);
+        if (s_count_in_block_successors != block_count_in_s_predecessors) {
+          AddError(
+              StringPrintf("Block %d lists %zu occurrences of block %d in its successors, whereas "
+                           "block %d lists %zu occurrences of block %d in its predecessors.",
+                           block->GetBlockId(),
+                           s_count_in_block_successors,
+                           s->GetBlockId(),
+                           s->GetBlockId(),
+                           block_count_in_s_predecessors,
+                           block->GetBlockId()));
+        }
+      }
     }
   }
 
@@ -587,21 +603,38 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
   }
 
   // Ensure 'instruction' has pointers to its inputs' use entries.
-  auto&& input_records = instruction->GetInputRecords();
-  for (size_t i = 0; i < input_records.size(); ++i) {
-    const HUserRecord<HInstruction*>& input_record = input_records[i];
-    HInstruction* input = input_record.GetInstruction();
-    if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
-        (input_record.GetUseNode() == input->GetUses().end()) ||
-        !input->GetUses().ContainsNode(*input_record.GetUseNode()) ||
-        (input_record.GetUseNode()->GetIndex() != i)) {
-      AddError(StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
-                            "at input %u (%s:%d).",
-                            instruction->DebugName(),
-                            instruction->GetId(),
-                            static_cast<unsigned>(i),
-                            input->DebugName(),
-                            input->GetId()));
+  {
+    auto&& input_records = instruction->GetInputRecords();
+    for (size_t i = 0; i < input_records.size(); ++i) {
+      const HUserRecord<HInstruction*>& input_record = input_records[i];
+      HInstruction* input = input_record.GetInstruction();
+
+      // Populate bookkeeping, if needed. See comment in graph_checker.h for uses_per_instruction_.
+      auto it = uses_per_instruction_.find(input->GetId());
+      if (it == uses_per_instruction_.end()) {
+        it = uses_per_instruction_
+                 .insert({input->GetId(),
+                          ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>(
+                              allocator_.Adapter(kArenaAllocGraphChecker))})
+                 .first;
+        for (auto&& use : input->GetUses()) {
+          it->second.insert(std::addressof(use));
+        }
+      }
+
+      if ((input_record.GetBeforeUseNode() == input->GetUses().end()) ||
+          (input_record.GetUseNode() == input->GetUses().end()) ||
+          (it->second.find(std::addressof(*input_record.GetUseNode())) == it->second.end()) ||
+          (input_record.GetUseNode()->GetIndex() != i)) {
+        AddError(
+            StringPrintf("Instruction %s:%d has an invalid iterator before use entry "
+                         "at input %u (%s:%d).",
+                         instruction->DebugName(),
+                         instruction->GetId(),
+                         static_cast<unsigned>(i),
+                         input->DebugName(),
+                         input->GetId()));
+      }
     }
   }
 
@@ -944,8 +977,7 @@ static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* in
 static bool IsConstantEquivalent(const HInstruction* insn1,
                                  const HInstruction* insn2,
                                  BitVector* visited) {
-  if (insn1->IsPhi() &&
-      insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
+  if (insn1->IsPhi() && insn1->AsPhi()->IsVRegEquivalentOf(insn2)) {
     HConstInputsRef insn1_inputs = insn1->GetInputs();
     HConstInputsRef insn2_inputs = insn2->GetInputs();
     if (insn1_inputs.size() != insn2_inputs.size()) {
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index d6644f3b50..aff2358411 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -22,7 +22,7 @@
 #include "base/arena_bit_vector.h"
 #include "base/bit_vector-inl.h"
 #include "base/macros.h"
-#include "base/scoped_arena_allocator.h"
+#include "base/scoped_arena_containers.h"
 #include "nodes.h"
 
 namespace art HIDDEN {
@@ -35,12 +35,13 @@ class GraphChecker : public HGraphDelegateVisitor {
   explicit GraphChecker(HGraph* graph,
                         CodeGenerator* codegen = nullptr,
                         const char* dump_prefix = "art::GraphChecker: ")
-    : HGraphDelegateVisitor(graph),
-      errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)),
-      dump_prefix_(dump_prefix),
-      allocator_(graph->GetArenaStack()),
-      seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker),
-      codegen_(codegen) {
+      : HGraphDelegateVisitor(graph),
+        errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)),
+        dump_prefix_(dump_prefix),
+        allocator_(graph->GetArenaStack()),
+        seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker),
+        uses_per_instruction_(allocator_.Adapter(kArenaAllocGraphChecker)),
+        codegen_(codegen) {
     seen_ids_.ClearAllBits();
   }
 
@@ -107,7 +108,7 @@ class GraphChecker : public HGraphDelegateVisitor {
     }
   }
 
- protected:
+ private:
   // Report a new error.
   void AddError(const std::string& error) {
     errors_.push_back(error);
@@ -118,7 +119,6 @@ class GraphChecker : public HGraphDelegateVisitor {
   // Errors encountered while checking the graph.
   ArenaVector<std::string> errors_;
 
- private:
   void VisitReversePostOrder();
 
   // Checks that the graph's flags are set correctly.
@@ -129,6 +129,13 @@ class GraphChecker : public HGraphDelegateVisitor {
   ScopedArenaAllocator allocator_;
   ArenaBitVector seen_ids_;
 
+  // As part of VisitInstruction, we verify that the instruction's input_record is present in the
+  // corresponding input's GetUses. If an instruction is used in many places (e.g. 200K+ uses), the
+  // linear search through GetUses is too slow. We can use bookkeeping to search in a set, instead
+  // of a list.
+  ScopedArenaSafeMap<int, ScopedArenaSet<const art::HUseListNode<art::HInstruction*>*>>
+      uses_per_instruction_;
+
   // Used to access target information.
   CodeGenerator* codegen_;
 
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 73bdd1e223..bd33fde907 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -610,6 +610,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
   }
 
   void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override {
+    VisitVecOperation(vec_mem_operation);
     StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString();
   }
 
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 5a4478dc14..91be79f8ec 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -180,7 +180,7 @@ bool HInliner::Run() {
   for (HBasicBlock* block : blocks) {
     for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) {
       HInstruction* next = instruction->GetNext();
-      HInvoke* call = instruction->AsInvoke();
+      HInvoke* call = instruction->AsInvokeOrNull();
       // As long as the call is not intrinsified, it is worth trying to inline.
       if (call != nullptr && !codegen_->IsImplementedIntrinsic(call)) {
         if (honor_noinline_directives) {
@@ -702,12 +702,14 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
   // Walk over the class descriptors and look up the actual classes.
   // If we cannot find a type we return kInlineCacheMissingTypes.
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
+  Thread* self = Thread::Current();
   for (const dex::TypeIndex& type_index : dex_pc_data.classes) {
     const DexFile* dex_file = caller_compilation_unit_.GetDexFile();
     const char* descriptor = pci->GetTypeDescriptor(dex_file, type_index);
-    ObjPtr<mirror::ClassLoader> class_loader = caller_compilation_unit_.GetClassLoader().Get();
-    ObjPtr<mirror::Class> clazz = class_linker->LookupResolvedType(descriptor, class_loader);
+    ObjPtr<mirror::Class> clazz =
+        class_linker->FindClass(self, descriptor, caller_compilation_unit_.GetClassLoader());
     if (clazz == nullptr) {
+      self->ClearException();  // Clean up the exception left by type resolution.
       VLOG(compiler) << "Could not find class from inline cache in AOT mode "
           << invoke_instruction->GetMethodReference().PrettyMethod()
           << " : "
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index fee9091145..fd599f789e 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1364,8 +1364,7 @@ bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc,
                                                         method_reference,
                                                         resolved_method,
                                                         resolved_method_reference,
-                                                        proto_idx,
-                                                        !graph_->IsDebuggable());
+                                                        proto_idx);
   if (!HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false)) {
     return false;
   }
@@ -2365,9 +2364,9 @@ void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg,
     second = LoadLocal(second_vreg_or_constant, type);
   }
 
-  if (!second_is_constant
-      || (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0)
-      || (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) {
+  if (!second_is_constant ||
+      (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) ||
+      (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) {
     second = new (allocator_) HDivZeroCheck(second, dex_pc);
     AppendInstruction(second);
   }
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 0c2fd5de56..0e2a62226f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -1050,51 +1050,60 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
     HInstruction* b = condition->InputAt(1);
     DataType::Type t_type = true_value->GetType();
     DataType::Type f_type = false_value->GetType();
-    // Here we have a <cmp> b ? true_value : false_value.
-    // Test if both values are compatible integral types (resulting MIN/MAX/ABS
-    // type will be int or long, like the condition). Replacements are general,
-    // but assume conditions prefer constants on the right.
     if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) {
-      // Allow a <  100 ? max(a, -100) : ..
-      //    or a > -100 ? min(a,  100) : ..
-      // to use min/max instead of a to detect nested min/max expressions.
-      HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
-      if (new_a != nullptr) {
-        a = new_a;
-      }
-      // Try to replace typical integral MIN/MAX/ABS constructs.
-      if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
-          ((a == true_value && b == false_value) ||
-           (b == true_value && a == false_value))) {
-        // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
-        //    or a > b ? a : b (MAX) or a > b ? b : a (MIN).
-        bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
-        replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
-      } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
-                 ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
-        bool negLeft = (cmp == kCondLT || cmp == kCondLE);
-        HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
-        HInstruction* not_negated = negLeft ? false_value : true_value;
-        if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
-          // Found a < 0 ? -a :  a
-          //    or a > 0 ?  a : -a
-          // which can be replaced by ABS(a).
-          replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
+      if (cmp == kCondEQ || cmp == kCondNE) {
+        // Turns
+        // * Select[a, b, EQ(a,b)] / Select[a, b, EQ(b,a)] into a
+        // * Select[a, b, NE(a,b)] / Select[a, b, NE(b,a)] into b
+        // Note that the order in EQ/NE is irrelevant.
+        if ((a == true_value && b == false_value) || (a == false_value && b == true_value)) {
+          replace_with = cmp == kCondEQ ? false_value : true_value;
+        }
+      } else {
+        // Test if both values are compatible integral types (resulting MIN/MAX/ABS
+        // type will be int or long, like the condition). Replacements are general,
+        // but assume conditions prefer constants on the right.
+
+        // Allow a <  100 ? max(a, -100) : ..
+        //    or a > -100 ? min(a,  100) : ..
+        // to use min/max instead of a to detect nested min/max expressions.
+        HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
+        if (new_a != nullptr) {
+          a = new_a;
         }
-      } else if (true_value->IsSub() && false_value->IsSub()) {
-        HInstruction* true_sub1 = true_value->InputAt(0);
-        HInstruction* true_sub2 = true_value->InputAt(1);
-        HInstruction* false_sub1 = false_value->InputAt(0);
-        HInstruction* false_sub2 = false_value->InputAt(1);
-        if ((((cmp == kCondGT || cmp == kCondGE) &&
-              (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) ||
-             ((cmp == kCondLT || cmp == kCondLE) &&
-              (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
-            AreLowerPrecisionArgs(t_type, a, b)) {
-          // Found a > b ? a - b  : b - a
-          //    or a < b ? b - a  : a - b
-          // which can be replaced by ABS(a - b) for lower precision operands a, b.
-          replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+        // Try to replace typical integral MIN/MAX/ABS constructs.
+        if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
+            ((a == true_value && b == false_value) || (b == true_value && a == false_value))) {
+          // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
+          //    or a > b ? a : b (MAX) or a > b ? b : a (MIN).
+          bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
+          replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
+        } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
+                   ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
+          bool negLeft = (cmp == kCondLT || cmp == kCondLE);
+          HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
+          HInstruction* not_negated = negLeft ? false_value : true_value;
+          if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
+            // Found a < 0 ? -a :  a
+            //    or a > 0 ?  a : -a
+            // which can be replaced by ABS(a).
+            replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
+          }
+        } else if (true_value->IsSub() && false_value->IsSub()) {
+          HInstruction* true_sub1 = true_value->InputAt(0);
+          HInstruction* true_sub2 = true_value->InputAt(1);
+          HInstruction* false_sub1 = false_value->InputAt(0);
+          HInstruction* false_sub2 = false_value->InputAt(1);
+          if ((((cmp == kCondGT || cmp == kCondGE) &&
+                (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) ||
+               ((cmp == kCondLT || cmp == kCondLE) &&
+                (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
+              AreLowerPrecisionArgs(t_type, a, b)) {
+            // Found a > b ? a - b  : b - a
+            //    or a < b ? b - a  : a - b
+            // which can be replaced by ABS(a - b) for lower precision operands a, b.
+            replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+          }
         }
       }
     }
@@ -1456,24 +1465,26 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
     }
   }
 
-  HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
-  if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) {
-    // Replace code looking like
-    //    NEG tmp, b
-    //    ADD dst, a, tmp
-    // with
-    //    SUB dst, a, b
-    // We do not perform the optimization if the input negation has environment
-    // uses or multiple non-environment uses as it could lead to worse code. In
-    // particular, we do not want the live range of `b` to be extended if we are
-    // not sure the initial 'NEG' instruction can be removed.
-    HInstruction* other = left_is_neg ? right : left;
-    HSub* sub =
-        new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput());
-    instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
-    RecordSimplification();
-    neg->GetBlock()->RemoveInstruction(neg);
-    return;
+  if (left_is_neg != right_is_neg) {
+    HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
+    if (neg->HasOnlyOneNonEnvironmentUse()) {
+      // Replace code looking like
+      //    NEG tmp, b
+      //    ADD dst, a, tmp
+      // with
+      //    SUB dst, a, b
+      // We do not perform the optimization if the input negation has environment
+      // uses or multiple non-environment uses as it could lead to worse code. In
+      // particular, we do not want the live range of `b` to be extended if we are
+      // not sure the initial 'NEG' instruction can be removed.
+      HInstruction* other = left_is_neg ? right : left;
+      HSub* sub =
+          new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput());
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
+      RecordSimplification();
+      neg->GetBlock()->RemoveInstruction(neg);
+      return;
+    }
   }
 
   if (TryReplaceWithRotate(instruction)) {
@@ -1676,7 +1687,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
   HInstruction* input_two = condition->InputAt(1);
   HLoadClass* load_class = input_one->IsLoadClass()
       ? input_one->AsLoadClass()
-      : input_two->AsLoadClass();
+      : input_two->AsLoadClassOrNull();
   if (load_class == nullptr) {
     return false;
   }
@@ -1688,8 +1699,8 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
   }
 
   HInstanceFieldGet* field_get = (load_class == input_one)
-      ? input_two->AsInstanceFieldGet()
-      : input_one->AsInstanceFieldGet();
+      ? input_two->AsInstanceFieldGetOrNull()
+      : input_one->AsInstanceFieldGetOrNull();
   if (field_get == nullptr) {
     return false;
   }
@@ -2240,6 +2251,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
   }
 
   if (left->IsAdd()) {
+    // Cases (x + y) - y = x, and (x + y) - x = y.
     // Replace code patterns looking like
     //    ADD dst1, x, y        ADD dst1, x, y
     //    SUB dst2, dst1, y     SUB dst2, dst1, x
@@ -2248,14 +2260,75 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
     // SUB instruction is not needed in this case, we may use
     // one of inputs of ADD instead.
     // It is applicable to integral types only.
+    HAdd* add = left->AsAdd();
     DCHECK(DataType::IsIntegralType(type));
-    if (left->InputAt(1) == right) {
-      instruction->ReplaceWith(left->InputAt(0));
+    if (add->GetRight() == right) {
+      instruction->ReplaceWith(add->GetLeft());
+      RecordSimplification();
+      instruction->GetBlock()->RemoveInstruction(instruction);
+      return;
+    } else if (add->GetLeft() == right) {
+      instruction->ReplaceWith(add->GetRight());
       RecordSimplification();
       instruction->GetBlock()->RemoveInstruction(instruction);
       return;
-    } else if (left->InputAt(0) == right) {
-      instruction->ReplaceWith(left->InputAt(1));
+    }
+  } else if (right->IsAdd()) {
+    // Cases y - (x + y) = -x, and  x - (x + y) = -y.
+    // Replace code patterns looking like
+    //    ADD dst1, x, y        ADD dst1, x, y
+    //    SUB dst2, y, dst1     SUB dst2, x, dst1
+    // with
+    //    ADD dst1, x, y        ADD dst1, x, y
+    //    NEG x                 NEG y
+    // SUB instruction is not needed in this case, we may use
+    // one of inputs of ADD instead with a NEG.
+    // It is applicable to integral types only.
+    HAdd* add = right->AsAdd();
+    DCHECK(DataType::IsIntegralType(type));
+    if (add->GetRight() == left) {
+      HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetLeft());
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+      RecordSimplification();
+      return;
+    } else if (add->GetLeft() == left) {
+      HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(add->GetType(), add->GetRight());
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+      RecordSimplification();
+      return;
+    }
+  } else if (left->IsSub()) {
+    // Case (x - y) - x = -y.
+    // Replace code patterns looking like
+    //    SUB dst1, x, y
+    //    SUB dst2, dst1, x
+    // with
+    //    SUB dst1, x, y
+    //    NEG y
+    // The second SUB is not needed in this case, we may use the second input of the first SUB
+    // instead with a NEG.
+    // It is applicable to integral types only.
+    HSub* sub = left->AsSub();
+    DCHECK(DataType::IsIntegralType(type));
+    if (sub->GetLeft() == right) {
+      HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(sub->GetType(), sub->GetRight());
+      instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, neg);
+      RecordSimplification();
+      return;
+    }
+  } else if (right->IsSub()) {
+    // Case x - (x - y) = y.
+    // Replace code patterns looking like
+    //    SUB dst1, x, y
+    //    SUB dst2, x, dst1
+    // with
+    //    SUB dst1, x, y
+    // The second SUB is not needed in this case, we may use the second input of the first SUB.
+    // It is applicable to integral types only.
+    HSub* sub = right->AsSub();
+    DCHECK(DataType::IsIntegralType(type));
+    if (sub->GetLeft() == left) {
+      instruction->ReplaceWith(sub->GetRight());
       RecordSimplification();
       instruction->GetBlock()->RemoveInstruction(instruction);
       return;
@@ -3215,7 +3288,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
   HInstruction* left = instruction->GetLeft();
   HInstruction* right = instruction->GetRight();
   // Variable names as described above.
-  HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstant();
+  HConstant* const2 = right->IsConstant() ? right->AsConstant() : left->AsConstantOrNull();
   if (const2 == nullptr) {
     return false;
   }
@@ -3231,7 +3304,7 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification(
   }
 
   left = y->GetLeft();
-  HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstant();
+  HConstant* const1 = left->IsConstant() ? left->AsConstant() : y->GetRight()->AsConstantOrNull();
   if (const1 == nullptr) {
     return false;
   }
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index ddc3a867b8..01489f8bcb 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -54,7 +54,7 @@ inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) {
 //   t3 = Sub(*, t2)
 inline bool IsSubRightSubLeftShl(HSub *sub) {
   HInstruction* right = sub->GetRight();
-  return right->IsSub() && right->AsSub()->GetLeft()->IsShl();;
+  return right->IsSub() && right->AsSub()->GetLeft()->IsShl();
 }
 
 }  // namespace helpers
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 774deec438..8357e57c1f 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -27,6 +27,7 @@
 #include "gc/space/image_space.h"
 #include "image-inl.h"
 #include "intrinsic_objects.h"
+#include "intrinsics_list.h"
 #include "nodes.h"
 #include "obj_ptr-inl.h"
 #include "scoped_thread_state_change-inl.h"
@@ -43,10 +44,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
     case Intrinsics::k ## Name: \
       os << # Name; \
       break;
-#include "intrinsics_list.h"
-      INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef STATIC_INTRINSICS_LIST
-#undef VIRTUAL_INTRINSICS_LIST
+      ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
   }
   return os;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 893cd04411..b6c7e1b997 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -19,6 +19,7 @@
 
 #include "base/macros.h"
 #include "code_generator.h"
+#include "intrinsics_list.h"
 #include "nodes.h"
 #include "optimization.h"
 #include "parallel_move_resolver.h"
@@ -48,9 +49,7 @@ class IntrinsicVisitor : public ValueObject {
       case Intrinsics::k ## Name: \
         Visit ## Name(invoke);    \
         return;
-#include "intrinsics_list.h"
-        INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+        ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
       // Do not put a default case. That way the compiler will complain if we missed a case.
@@ -60,11 +59,8 @@ class IntrinsicVisitor : public ValueObject {
   // Define visitor methods.
 
 #define OPTIMIZING_INTRINSICS(Name, ...) \
-  virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-  }
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  virtual void Visit##Name([[maybe_unused]] HInvoke* invoke) {}
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
   static void MoveArguments(HInvoke* invoke,
@@ -254,11 +250,9 @@ class VarHandleOptimizations : public IntrinsicOptimizations {
 // intrinsic to exploit e.g. no side-effects or exceptions, but otherwise not handled
 // by this architecture-specific intrinsics code generator. Eventually it is implemented
 // as a true method call.
-#define UNIMPLEMENTED_INTRINSIC(Arch, Name)                                               \
-void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
-}                                                                                         \
-void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) {    \
-}
+#define UNIMPLEMENTED_INTRINSIC(Arch, Name)                                              \
+  void IntrinsicLocationsBuilder##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} \
+  void IntrinsicCodeGenerator##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {}
 
 // Defines a list of unreached intrinsics: that is, method calls that are recognized as
 // an intrinsic, and then always converted into HIR instructions before they reach any
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index d2dbaa32e3..2ec2134fb1 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2582,7 +2582,7 @@ static constexpr int32_t kSystemArrayCopyCharThreshold = 192;
 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
                                                uint32_t at,
                                                HInstruction* input) {
-  HIntConstant* const_input = input->AsIntConstant();
+  HIntConstant* const_input = input->AsIntConstantOrNull();
   if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) {
     locations->SetInAt(at, Location::RequiresRegister());
   } else {
@@ -2593,8 +2593,8 @@ static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
-  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
-  HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+  HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstantOrNull();
 
   // The positions must be non-negative.
   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -2605,7 +2605,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) {
 
   // The length must be >= 0 and not so long that we would (currently) prefer libcore's
   // native implementation.
-  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
   if (length != nullptr) {
     int32_t len = length->GetValue();
     if (len < 0 || len > kSystemArrayCopyCharThreshold) {
@@ -2903,8 +2903,8 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
 
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
-  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
-  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
 
   // The positions must be non-negative.
   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -2914,7 +2914,7 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) {
   }
 
   // The length must be >= 0.
-  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
   if (length != nullptr) {
     int32_t len = length->GetValue();
     if (len < 0 || len >= kSystemArrayCopyThreshold) {
@@ -3009,8 +3009,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
         __ B(intrinsic_slow_path->GetEntryLabel(), eq);
       }
       // Checked when building locations.
-      DCHECK(!optimizations.GetDestinationIsSource()
-             || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
+      DCHECK(!optimizations.GetDestinationIsSource() ||
+             (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue()));
     } else {
       if (!optimizations.GetDestinationIsSource()) {
         __ Cmp(src, dest);
@@ -3676,7 +3676,7 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
   locations->SetInAt(0, Location::Any());
 }
 
-void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorARM64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
 
 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
   if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
@@ -4711,8 +4711,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
                                          LocationFrom(target.object),
                                          method.X(),
                                          ArtField::DeclaringClassOffset().Int32Value(),
-                                         /*fixup_label=*/ nullptr,
-                                         gCompilerReadBarrierOption);
+                                         /*fixup_label=*/nullptr,
+                                         GetCompilerReadBarrierOption());
       }
     }
   } else {
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index a0ccf87f7b..b20cea65f4 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -19,6 +19,7 @@
 
 #include "base/macros.h"
 #include "intrinsics.h"
+#include "intrinsics_list.h"
 
 namespace vixl {
 namespace aarch64 {
@@ -47,9 +48,7 @@ class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
   // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -72,9 +71,7 @@ class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
  private:
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 266b5bc799..d31593cf9f 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -1252,9 +1252,9 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
     return;
   }
 
-  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
-  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
-  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
 
   if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
     locations->SetInAt(1, Location::RequiresRegister());
@@ -2653,7 +2653,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) {
   locations->SetInAt(0, Location::Any());
 }
 
-void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
 
 void IntrinsicLocationsBuilderARMVIXL::VisitIntegerDivideUnsigned(HInvoke* invoke) {
   CreateIntIntToIntSlowPathCallLocations(allocator_, invoke);
@@ -4351,7 +4351,7 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
                                          LocationFrom(target.object),
                                          method,
                                          ArtField::DeclaringClassOffset().Int32Value(),
-                                         gCompilerReadBarrierOption);
+                                         GetCompilerReadBarrierOption());
       }
     }
   } else {
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 54475bcc7e..f517d21c9d 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -19,6 +19,7 @@
 
 #include "base/macros.h"
 #include "intrinsics.h"
+#include "intrinsics_list.h"
 #include "utils/arm/assembler_arm_vixl.h"
 
 namespace art HIDDEN {
@@ -36,9 +37,7 @@ class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
   // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -63,9 +62,7 @@ class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
  private:
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
new file mode 100644
index 0000000000..668b3862ad
--- /dev/null
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_riscv64.h"
+
+#include "code_generator_riscv64.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
+}
+
+Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() {
+  return codegen_->GetAssembler();
+}
+
+#define __ GetAssembler()->
+
+static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+  LocationSummary* locations =
+      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+  LocationSummary* locations =
+      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>());
+  __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+  __ Snez(out, out);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) {
+  LocationSummary* locations = invoke->GetLocations();
+  XRegister out = locations->Out().AsRegister<XRegister>();
+  __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>());
+  __ Andi(out, out, kPositiveInfinity | kNegativeInfinity);
+  __ Snez(out, out);
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+  LocationSummary* locations =
+      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+template <typename EmitOp>
+void EmitMemoryPeek(HInvoke* invoke, EmitOp&& emit_op) {
+  LocationSummary* locations = invoke->GetLocations();
+  emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) {
+  EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); });
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
+  LocationSummary* locations =
+      new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+template <typename EmitOp>
+void EmitMemoryPoke(HInvoke* invoke, EmitOp&& emit_op) {
+  LocationSummary* locations = invoke->GetLocations();
+  emit_op(locations->InAt(1).AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) {
+  EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); });
+}
+
+template <typename EmitOp>
+void EmitIntegralUnOp(HInvoke* invoke, EmitOp&& emit_op) {
+  LocationSummary* locations = invoke->GetLocations();
+  emit_op(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsRegister<XRegister>());
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+    // There is no 32-bit reverse bytes instruction.
+    __ Rev8(rd, rs1);
+    __ Srai(rd, rd, 32);
+  });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Rev8(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+    // There is no 16-bit reverse bytes instruction.
+    __ Rev8(rd, rs1);
+    __ Srai(rd, rd, 48);
+  });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    XRegister tmp2 = srs.AllocateXRegister();
+    __ Clzw(tmp, rs1);
+    __ Li(tmp2, INT64_C(-0x80000000));
+    __ Srlw(tmp2, tmp2, tmp);
+    __ And(rd, rs1, tmp2);  // Make sure the result is zero if the input is zero.
+  });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    XRegister tmp2 = srs.AllocateXRegister();
+    __ Clz(tmp, rs1);
+    __ Li(tmp2, INT64_C(-0x8000000000000000));
+    __ Srl(tmp2, tmp2, tmp);
+    __ And(rd, rs1, tmp2);  // Make sure the result is zero if the input is zero.
+  });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    __ NegW(tmp, rs1);
+    __ And(rd, rs1, tmp);
+  });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) {
+    ScratchRegisterScope srs(GetAssembler());
+    XRegister tmp = srs.AllocateXRegister();
+    __ Neg(tmp, rs1);
+    __ And(rd, rs1, tmp);
+  });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); });
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(allocator_, invoke);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); });
+}
+
+#define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name)
+UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED);
+#undef MARK_UNIMPLEMENTED
+
+UNREACHABLE_INTRINSICS(RISCV64)
+
+}  // namespace riscv64
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_riscv64.h b/compiler/optimizing/intrinsics_riscv64.h
new file mode 100644
index 0000000000..49c057de2b
--- /dev/null
+++ b/compiler/optimizing/intrinsics_riscv64.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
+
+#include "base/macros.h"
+#include "intrinsics.h"
+#include "intrinsics_list.h"
+
+namespace art HIDDEN {
+
+class ArenaAllocator;
+class HInvokeStaticOrDirect;
+class HInvokeVirtual;
+
+namespace riscv64 {
+
+class CodeGeneratorRISCV64;
+class Riscv64Assembler;
+
+class IntrinsicLocationsBuilderRISCV64 final : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderRISCV64(ArenaAllocator* allocator,
+                                            CodeGeneratorRISCV64* codegen)
+      : allocator_(allocator), codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, ...) \
+  void Visit##Name(HInvoke* invoke) override;
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* const allocator_;
+  CodeGeneratorRISCV64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderRISCV64);
+};
+
+class IntrinsicCodeGeneratorRISCV64 final : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorRISCV64(CodeGeneratorRISCV64* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, ...) \
+  void Visit##Name(HInvoke* invoke);
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  Riscv64Assembler* GetAssembler();
+
+  ArenaAllocator* GetAllocator();
+
+  CodeGeneratorRISCV64* const codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorRISCV64);
+};
+
+}  // namespace riscv64
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_RISCV64_H_
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index d2072201f8..02f312e74e 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -394,7 +394,6 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
   }
 
   HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(static_or_direct != nullptr);
   LocationSummary* locations =
       new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
   locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -774,9 +773,9 @@ void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
   // We need at least two of the positions or length to be an integer constant,
   // or else we won't have enough free registers.
-  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
-  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
-  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
 
   int num_constants =
       ((src_pos != nullptr) ? 1 : 0)
@@ -1205,7 +1204,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
-    std::numeric_limits<uint16_t>::max()) {
+        std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
@@ -1445,7 +1444,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   Register obj = locations->InAt(0).AsRegister<Register>();
   Location srcBegin = locations->InAt(1);
   int srcBegin_value =
-    srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
+      srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
   Register srcEnd = locations->InAt(2).AsRegister<Register>();
   Register dst = locations->InAt(3).AsRegister<Register>();
   Register dstBegin = locations->InAt(4).AsRegister<Register>();
@@ -3504,7 +3503,7 @@ void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
   locations->SetInAt(0, Location::Any());
 }
 
-void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorX86::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
 
 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
   LocationSummary* locations = new (allocator_) LocationSummary(invoke,
@@ -3781,7 +3780,7 @@ static Register GenerateVarHandleFieldReference(HInvoke* invoke,
                                            Location::RegisterLocation(temp),
                                            Address(temp, declaring_class_offset),
                                            /* fixup_label= */ nullptr,
-                                           gCompilerReadBarrierOption);
+                                           GetCompilerReadBarrierOption());
     return temp;
   }
 
@@ -3860,7 +3859,7 @@ static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
   Address field_addr(ref, offset, TIMES_1, 0);
 
   // Load the value from the field
-  if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) {
+  if (type == DataType::Type::kReference && GetCompilerReadBarrierOption() == kWithReadBarrier) {
     codegen->GenerateReferenceLoadWithBakerReadBarrier(
         invoke, out, ref, field_addr, /* needs_null_check= */ false);
   } else if (type == DataType::Type::kInt64 &&
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index 77c236d244..fc2f0e3fbd 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -19,6 +19,7 @@
 
 #include "base/macros.h"
 #include "intrinsics.h"
+#include "intrinsics_list.h"
 
 namespace art HIDDEN {
 
@@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
   // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
  private:
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 9d0d5f155e..842af6b73f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -222,34 +222,34 @@ static void GenIsInfinite(LocationSummary* locations,
     double kPositiveInfinity = std::numeric_limits<double>::infinity();
     double kNegativeInfinity = -1 * kPositiveInfinity;
 
-     __ xorq(output, output);
-     __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
-     __ j(kNotEqual, &done1);
-     __ j(kParityEven, &done2);
-     __ movq(output, Immediate(1));
-     __ jmp(&done2);
-     __ Bind(&done1);
-     __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
-     __ j(kNotEqual, &done2);
-     __ j(kParityEven, &done2);
-     __ movq(output, Immediate(1));
-     __ Bind(&done2);
+    __ xorq(output, output);
+    __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
+    __ j(kNotEqual, &done1);
+    __ j(kParityEven, &done2);
+    __ movq(output, Immediate(1));
+    __ jmp(&done2);
+    __ Bind(&done1);
+    __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
+    __ j(kNotEqual, &done2);
+    __ j(kParityEven, &done2);
+    __ movq(output, Immediate(1));
+    __ Bind(&done2);
   } else {
     float kPositiveInfinity = std::numeric_limits<float>::infinity();
     float kNegativeInfinity = -1 * kPositiveInfinity;
 
-     __ xorl(output, output);
-     __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
-     __ j(kNotEqual, &done1);
-     __ j(kParityEven, &done2);
-     __ movl(output, Immediate(1));
-     __ jmp(&done2);
-     __ Bind(&done1);
-     __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
-     __ j(kNotEqual, &done2);
-     __ j(kParityEven, &done2);
-     __ movl(output, Immediate(1));
-     __ Bind(&done2);
+    __ xorl(output, output);
+    __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
+    __ j(kNotEqual, &done1);
+    __ j(kParityEven, &done2);
+    __ movl(output, Immediate(1));
+    __ jmp(&done2);
+    __ Bind(&done1);
+    __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
+    __ j(kNotEqual, &done2);
+    __ j(kParityEven, &done2);
+    __ movl(output, Immediate(1));
+    __ Bind(&done2);
   }
 }
 
@@ -617,8 +617,8 @@ void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
   // Check to see if we have known failures that will cause us to have to bail out
   // to the runtime, and just generate the runtime call directly.
-  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
-  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
 
   // The positions must be non-negative.
   if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
@@ -628,7 +628,7 @@ static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
   }
 
   // The length must be > 0.
-  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
   if (length != nullptr) {
     int32_t len = length->GetValue();
     if (len < 0) {
@@ -1424,7 +1424,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
   HInstruction* code_point = invoke->InputAt(1);
   if (code_point->IsIntConstant()) {
     if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
-    std::numeric_limits<uint16_t>::max()) {
+        std::numeric_limits<uint16_t>::max()) {
       // Always needs the slow-path. We could directly dispatch to it, but this case should be
       // rare, so for simplicity just put the full slow-path down and branch unconditionally.
       slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
@@ -1655,7 +1655,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
   Location srcBegin = locations->InAt(1);
   int srcBegin_value =
-    srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
+      srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
   CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
   CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
   CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
@@ -1871,7 +1871,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
 
 static void GenUnsafeGet(HInvoke* invoke,
                          DataType::Type type,
-                         bool is_volatile ATTRIBUTE_UNUSED,
+                         [[maybe_unused]] bool is_volatile,
                          CodeGeneratorX86_64* codegen) {
   X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
   LocationSummary* locations = invoke->GetLocations();
@@ -3249,7 +3249,7 @@ void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
   locations->SetInAt(0, Location::Any());
 }
 
-void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
 
 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
   LocationSummary* locations =
@@ -3770,8 +3770,8 @@ static void GenerateVarHandleTarget(HInvoke* invoke,
         instr_codegen->GenerateGcRootFieldLoad(invoke,
                                                Location::RegisterLocation(target.object),
                                                Address(method, ArtField::DeclaringClassOffset()),
-                                               /*fixup_label=*/ nullptr,
-                                               gCompilerReadBarrierOption);
+                                               /*fixup_label=*/nullptr,
+                                               GetCompilerReadBarrierOption());
       }
     }
   } else {
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 59fe815a94..d0ee6f622d 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -19,6 +19,7 @@
 
 #include "base/macros.h"
 #include "intrinsics.h"
+#include "intrinsics_list.h"
 
 namespace art HIDDEN {
 
@@ -39,9 +40,7 @@ class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
   // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
@@ -64,9 +63,7 @@ class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor {
 
 #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
   void Visit ## Name(HInvoke* invoke) override;
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
 
  private:
diff --git a/compiler/optimizing/jit_patches_arm64.cc b/compiler/optimizing/jit_patches_arm64.cc
new file mode 100644
index 0000000000..76ba182acb
--- /dev/null
+++ b/compiler/optimizing/jit_patches_arm64.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generation_data.h"
+#include "gc_root.h"
+#include "jit_patches_arm64.h"
+
+namespace art HIDDEN {
+
+namespace arm64 {
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateUint32Literal(
+    uint32_t value) {
+  return uint32_literals_.GetOrCreate(
+      value,
+      [this, value]() {
+        return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(value);
+      });
+}
+
+vixl::aarch64::Literal<uint64_t>* JitPatchesARM64::DeduplicateUint64Literal(
+    uint64_t value) {
+  return uint64_literals_.GetOrCreate(
+      value,
+      [this, value]() {
+        return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint64_t>(value);
+      });
+}
+
+static void PatchJitRootUse(uint8_t* code,
+                            const uint8_t* roots_data,
+                            vixl::aarch64::Literal<uint32_t>* literal,
+                            uint64_t index_in_table) {
+  uint32_t literal_offset = literal->GetOffset();
+  uintptr_t address =
+      reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+  uint8_t* data = code + literal_offset;
+  reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void JitPatchesARM64::EmitJitRootPatches(
+    uint8_t* code,
+    const uint8_t* roots_data,
+    const CodeGenerationData& code_generation_data) const {
+  for (const auto& entry : jit_string_patches_) {
+    const StringReference& string_reference = entry.first;
+    vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+    uint64_t index_in_table = code_generation_data.GetJitStringRootIndex(string_reference);
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+  }
+  for (const auto& entry : jit_class_patches_) {
+    const TypeReference& type_reference = entry.first;
+    vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
+    uint64_t index_in_table = code_generation_data.GetJitClassRootIndex(type_reference);
+    PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
+  }
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateBootImageAddressLiteral(
+    uint64_t address) {
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitStringLiteral(
+    const DexFile& dex_file,
+    dex::StringIndex string_index,
+    Handle<mirror::String> handle,
+    CodeGenerationData* code_generation_data) {
+  code_generation_data->ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
+  return jit_string_patches_.GetOrCreate(
+      StringReference(&dex_file, string_index),
+      [this]() {
+        return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
+      });
+}
+
+vixl::aarch64::Literal<uint32_t>* JitPatchesARM64::DeduplicateJitClassLiteral(
+    const DexFile& dex_file,
+    dex::TypeIndex type_index,
+    Handle<mirror::Class> handle,
+    CodeGenerationData* code_generation_data) {
+  code_generation_data->ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
+  return jit_class_patches_.GetOrCreate(
+      TypeReference(&dex_file, type_index),
+      [this]() {
+        return GetVIXLAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
+      });
+}
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/optimizing/jit_patches_arm64.h b/compiler/optimizing/jit_patches_arm64.h
new file mode 100644
index 0000000000..f928723f58
--- /dev/null
+++ b/compiler/optimizing/jit_patches_arm64.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
+
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "dex/dex_file.h"
+#include "dex/string_reference.h"
+#include "dex/type_reference.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "mirror/string.h"
+#include "utils/arm64/assembler_arm64.h"
+
+// TODO(VIXL): Make VIXL compile with -Wshadow.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wshadow"
+#include "aarch64/disasm-aarch64.h"
+#include "aarch64/macro-assembler-aarch64.h"
+#pragma GCC diagnostic pop
+
+namespace art HIDDEN {
+
+class CodeGenerationData;
+
+namespace arm64 {
+
+/**
+ * Helper for emitting string or class literals into JIT generated code,
+ * which can be shared between different compilers.
+ */
+class JitPatchesARM64 {
+ public:
+  JitPatchesARM64(Arm64Assembler* assembler, ArenaAllocator* allocator) :
+      assembler_(assembler),
+      uint32_literals_(std::less<uint32_t>(),
+                       allocator->Adapter(kArenaAllocCodeGenerator)),
+      uint64_literals_(std::less<uint64_t>(),
+                       allocator->Adapter(kArenaAllocCodeGenerator)),
+      jit_string_patches_(StringReferenceValueComparator(),
+                          allocator->Adapter(kArenaAllocCodeGenerator)),
+      jit_class_patches_(TypeReferenceValueComparator(),
+                         allocator->Adapter(kArenaAllocCodeGenerator)) {
+  }
+
+  using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
+  using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
+  using StringToLiteralMap = ArenaSafeMap<StringReference,
+                                          vixl::aarch64::Literal<uint32_t>*,
+                                          StringReferenceValueComparator>;
+  using TypeToLiteralMap = ArenaSafeMap<TypeReference,
+                                        vixl::aarch64::Literal<uint32_t>*,
+                                        TypeReferenceValueComparator>;
+
+  vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
+  vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(
+      const DexFile& dex_file,
+      dex::StringIndex string_index,
+      Handle<mirror::String> handle,
+      CodeGenerationData* code_generation_data);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateJitClassLiteral(
+      const DexFile& dex_file,
+      dex::TypeIndex type_index,
+      Handle<mirror::Class> handle,
+      CodeGenerationData* code_generation_data);
+
+  void EmitJitRootPatches(uint8_t* code,
+                          const uint8_t* roots_data,
+                          const CodeGenerationData& code_generation_data) const;
+
+  Arm64Assembler* GetAssembler() const { return assembler_; }
+  vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
+
+ private:
+  Arm64Assembler* assembler_;
+  // Deduplication map for 32-bit literals, used for JIT for boot image addresses.
+  Uint32ToLiteralMap uint32_literals_;
+  // Deduplication map for 64-bit literals, used for JIT for method address or method code.
+  Uint64ToLiteralMap uint64_literals_;
+  // Patches for string literals in JIT compiled code.
+  StringToLiteralMap jit_string_patches_;
+  // Patches for class literals in JIT compiled code.
+  TypeToLiteralMap jit_class_patches_;
+};
+
+}  // namespace arm64
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_JIT_PATCHES_ARM64_H_
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 01daa23511..6f4f2b6cf6 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -55,6 +55,7 @@ void LinearizeTest::TestCode(const std::vector<uint16_t>& data,
 }
 
 TEST_F(LinearizeTest, CFG1) {
+  TEST_DISABLED_FOR_RISCV64();
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -80,6 +81,7 @@ TEST_F(LinearizeTest, CFG1) {
 }
 
 TEST_F(LinearizeTest, CFG2) {
+  TEST_DISABLED_FOR_RISCV64();
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -105,6 +107,7 @@ TEST_F(LinearizeTest, CFG2) {
 }
 
 TEST_F(LinearizeTest, CFG3) {
+  TEST_DISABLED_FOR_RISCV64();
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -132,6 +135,7 @@ TEST_F(LinearizeTest, CFG3) {
 }
 
 TEST_F(LinearizeTest, CFG4) {
+  TEST_DISABLED_FOR_RISCV64();
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -162,6 +166,7 @@ TEST_F(LinearizeTest, CFG4) {
 }
 
 TEST_F(LinearizeTest, CFG5) {
+  TEST_DISABLED_FOR_RISCV64();
   /* Structure of this graph (+ are back edges)
   //            Block0
   //              |
@@ -192,6 +197,7 @@ TEST_F(LinearizeTest, CFG5) {
 }
 
 TEST_F(LinearizeTest, CFG6) {
+  TEST_DISABLED_FOR_RISCV64();
   //            Block0
   //              |
   //            Block1
@@ -218,6 +224,7 @@ TEST_F(LinearizeTest, CFG6) {
 }
 
 TEST_F(LinearizeTest, CFG7) {
+  TEST_DISABLED_FOR_RISCV64();
   // Structure of this graph (+ are back edges)
   //            Block0
   //              |
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index fb1a23eef4..7e488ba41d 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -47,6 +47,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) {
 }
 
 TEST_F(LiveRangesTest, CFG1) {
+  TEST_DISABLED_FOR_RISCV64();
   /*
    * Test the following snippet:
    *  return 0;
@@ -81,6 +82,7 @@ TEST_F(LiveRangesTest, CFG1) {
 }
 
 TEST_F(LiveRangesTest, CFG2) {
+  TEST_DISABLED_FOR_RISCV64();
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -125,6 +127,7 @@ TEST_F(LiveRangesTest, CFG2) {
 }
 
 TEST_F(LiveRangesTest, CFG3) {
+  TEST_DISABLED_FOR_RISCV64();
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -194,6 +197,7 @@ TEST_F(LiveRangesTest, CFG3) {
 }
 
 TEST_F(LiveRangesTest, Loop1) {
+  TEST_DISABLED_FOR_RISCV64();
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -270,6 +274,7 @@ TEST_F(LiveRangesTest, Loop1) {
 }
 
 TEST_F(LiveRangesTest, Loop2) {
+  TEST_DISABLED_FOR_RISCV64();
   /*
    * Test the following snippet:
    *  var a = 0;
@@ -341,6 +346,7 @@ TEST_F(LiveRangesTest, Loop2) {
 }
 
 TEST_F(LiveRangesTest, CFG4) {
+  TEST_DISABLED_FOR_RISCV64();
   /*
    * Test the following snippet:
    *  var a = 0;
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 0b421cf9e6..6af07aea4e 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -70,6 +70,7 @@ void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expec
 }
 
 TEST_F(LivenessTest, CFG1) {
+  TEST_DISABLED_FOR_RISCV64();
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -93,6 +94,7 @@ TEST_F(LivenessTest, CFG1) {
 }
 
 TEST_F(LivenessTest, CFG2) {
+  TEST_DISABLED_FOR_RISCV64();
   const char* expected =
     "Block 0\n"
     "  live in: (0)\n"
@@ -115,6 +117,7 @@ TEST_F(LivenessTest, CFG2) {
 }
 
 TEST_F(LivenessTest, CFG3) {
+  TEST_DISABLED_FOR_RISCV64();
   const char* expected =
     "Block 0\n"  // entry block
     "  live in: (000)\n"
@@ -144,6 +147,7 @@ TEST_F(LivenessTest, CFG3) {
 }
 
 TEST_F(LivenessTest, CFG4) {
+  TEST_DISABLED_FOR_RISCV64();
   // var a;
   // if (0 == 0) {
   //   a = 5;
@@ -192,6 +196,7 @@ TEST_F(LivenessTest, CFG4) {
 }
 
 TEST_F(LivenessTest, CFG5) {
+  TEST_DISABLED_FOR_RISCV64();
   // var a = 0;
   // if (0 == 0) {
   // } else {
@@ -237,6 +242,7 @@ TEST_F(LivenessTest, CFG5) {
 }
 
 TEST_F(LivenessTest, Loop1) {
+  TEST_DISABLED_FOR_RISCV64();
   // Simple loop with one preheader and one back edge.
   // var a = 0;
   // while (a == a) {
@@ -283,6 +289,7 @@ TEST_F(LivenessTest, Loop1) {
 }
 
 TEST_F(LivenessTest, Loop3) {
+  TEST_DISABLED_FOR_RISCV64();
   // Test that the returned value stays live in a preceding loop.
   // var a = 0;
   // while (a == a) {
@@ -330,6 +337,7 @@ TEST_F(LivenessTest, Loop3) {
 
 
 TEST_F(LivenessTest, Loop4) {
+  TEST_DISABLED_FOR_RISCV64();
   // Make sure we support a preheader of a loop not being the first predecessor
   // in the predecessor list of the header.
   // var a = 0;
@@ -382,6 +390,7 @@ TEST_F(LivenessTest, Loop4) {
 }
 
 TEST_F(LivenessTest, Loop5) {
+  TEST_DISABLED_FOR_RISCV64();
   // Make sure we create a preheader of a loop when a header originally has two
   // incoming blocks and one back edge.
   // Bitsets are made of:
@@ -438,6 +447,7 @@ TEST_F(LivenessTest, Loop5) {
 }
 
 TEST_F(LivenessTest, Loop6) {
+  TEST_DISABLED_FOR_RISCV64();
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2)
   const char* expected =
@@ -489,6 +499,7 @@ TEST_F(LivenessTest, Loop6) {
 
 
 TEST_F(LivenessTest, Loop7) {
+  TEST_DISABLED_FOR_RISCV64();
   // Bitsets are made of:
   // (constant0, constant4, constant5, phi in block 2, phi in block 6)
   const char* expected =
@@ -543,6 +554,7 @@ TEST_F(LivenessTest, Loop7) {
 }
 
 TEST_F(LivenessTest, Loop8) {
+  TEST_DISABLED_FOR_RISCV64();
   // var a = 0;
   // while (a == a) {
   //   a = a + a;
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
index f1c50ac03c..75000c8b91 100644
--- a/compiler/optimizing/load_store_analysis.cc
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -41,7 +41,7 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1,
     // We currently only support Add and Sub operations.
     return true;
   }
-  if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != idx2) {
+  if (idx1->GetLeastConstantLeft() != idx2) {
     // Cannot analyze [i+CONST1] and [j].
     return true;
   }
@@ -51,9 +51,9 @@ static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1,
 
   // Since 'i' are the same in [i+CONST] and [i],
   // further compare [CONST] and [0].
-  int64_t l1 = idx1->IsAdd() ?
-               idx1->GetConstantRight()->AsIntConstant()->GetValue() :
-               -idx1->GetConstantRight()->AsIntConstant()->GetValue();
+  int64_t l1 = idx1->IsAdd()
+      ? idx1->GetConstantRight()->AsIntConstant()->GetValue()
+      : -idx1->GetConstantRight()->AsIntConstant()->GetValue();
   int64_t l2 = 0;
   int64_t h1 = l1 + (vector_length1 - 1);
   int64_t h2 = l2 + (vector_length2 - 1);
@@ -68,8 +68,7 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1,
     // We currently only support Add and Sub operations.
     return true;
   }
-  if (idx1->AsBinaryOperation()->GetLeastConstantLeft() !=
-      idx2->AsBinaryOperation()->GetLeastConstantLeft()) {
+  if (idx1->GetLeastConstantLeft() != idx2->GetLeastConstantLeft()) {
     // Cannot analyze [i+CONST1] and [j+CONST2].
     return true;
   }
@@ -80,12 +79,12 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1,
 
   // Since 'i' are the same in [i+CONST1] and [i+CONST2],
   // further compare [CONST1] and [CONST2].
-  int64_t l1 = idx1->IsAdd() ?
-               idx1->GetConstantRight()->AsIntConstant()->GetValue() :
-               -idx1->GetConstantRight()->AsIntConstant()->GetValue();
-  int64_t l2 = idx2->IsAdd() ?
-               idx2->GetConstantRight()->AsIntConstant()->GetValue() :
-               -idx2->GetConstantRight()->AsIntConstant()->GetValue();
+  int64_t l1 = idx1->IsAdd()
+      ? idx1->GetConstantRight()->AsIntConstant()->GetValue()
+      : -idx1->GetConstantRight()->AsIntConstant()->GetValue();
+  int64_t l2 = idx2->IsAdd()
+      ? idx2->GetConstantRight()->AsIntConstant()->GetValue()
+      : -idx2->GetConstantRight()->AsIntConstant()->GetValue();
   int64_t h1 = l1 + (vector_length1 - 1);
   int64_t h2 = l2 + (vector_length2 - 1);
   return CanIntegerRangesOverlap(l1, h1, l2, h2);
@@ -269,6 +268,13 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1,
 }
 
 bool LoadStoreAnalysis::Run() {
+  // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
+  // memory operations with different predicates.
+  // TODO: support predicated SIMD.
+  if (graph_->HasPredicatedSIMD()) {
+    return false;
+  }
+
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     heap_location_collector_.VisitBasicBlock(block);
   }
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index c46a5b9cc1..ee425454a0 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -610,6 +610,7 @@ class HeapLocationCollector : public HGraphVisitor {
   }
 
   void VisitVecLoad(HVecLoad* instruction) override {
+    DCHECK(!instruction->IsPredicated());
     HInstruction* array = instruction->InputAt(0);
     HInstruction* index = instruction->InputAt(1);
     DataType::Type type = instruction->GetPackedType();
@@ -618,6 +619,7 @@ class HeapLocationCollector : public HGraphVisitor {
   }
 
   void VisitVecStore(HVecStore* instruction) override {
+    DCHECK(!instruction->IsPredicated());
     HInstruction* array = instruction->InputAt(0);
     HInstruction* index = instruction->InputAt(1);
     DataType::Type type = instruction->GetPackedType();
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index 865febbd31..8c6812f184 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -897,7 +897,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_left = new (GetAllocator()) HGoto();
-  call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_left->SetRawInputAt(0, new_inst);
   left->AddInstruction(call_left);
   left->AddInstruction(goto_left);
 
@@ -1007,7 +1007,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape2) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_left = new (GetAllocator()) HGoto();
-  call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_left->SetRawInputAt(0, new_inst);
   left->AddInstruction(call_left);
   left->AddInstruction(goto_left);
 
@@ -1131,7 +1131,7 @@ TEST_F(LoadStoreAnalysisTest, PartialEscape3) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_left = new (GetAllocator()) HGoto();
-  call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_left->SetRawInputAt(0, new_inst);
   left->AddInstruction(call_left);
   left->AddInstruction(goto_left);
 
@@ -1412,7 +1412,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacentNoPredicated) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_left = new (GetAllocator()) HGoto();
-  call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_left->SetRawInputAt(0, new_inst);
   left->AddInstruction(call_left);
   left->AddInstruction(goto_left);
 
@@ -1514,7 +1514,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_left = new (GetAllocator()) HGoto();
-  call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_left->SetRawInputAt(0, new_inst);
   left->AddInstruction(call_left);
   left->AddInstruction(goto_left);
 
@@ -1626,7 +1626,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_left = new (GetAllocator()) HGoto();
-  call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_left->SetRawInputAt(0, new_inst);
   left->AddInstruction(call_left);
   left->AddInstruction(goto_left);
 
@@ -1653,7 +1653,7 @@ TEST_F(LoadStoreAnalysisTest, TotalEscape) {
                                                                      graph_->GetDexFile(),
                                                                      0);
   HInstruction* goto_right = new (GetAllocator()) HGoto();
-  call_right->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_right->SetRawInputAt(0, new_inst);
   right->AddInstruction(write_right);
   right->AddInstruction(call_right);
   right->AddInstruction(goto_right);
@@ -1813,7 +1813,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_left = new (GetAllocator()) HGoto();
-  call_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_left->SetRawInputAt(0, new_inst);
   high_left->AddInstruction(call_left);
   high_left->AddInstruction(goto_left);
 
@@ -1870,7 +1870,7 @@ TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) {
                             HInvokeStaticOrDirect::ClinitCheckRequirement::kNone,
                             !graph_->IsDebuggable());
   HInstruction* goto_low_left = new (GetAllocator()) HGoto();
-  call_low_left->AsInvoke()->SetRawInputAt(0, new_inst);
+  call_low_left->SetRawInputAt(0, new_inst);
   low_left->AddInstruction(call_low_left);
   low_left->AddInstruction(goto_low_left);
 
@@ -2030,7 +2030,7 @@ TEST_F(LoadStoreAnalysisTest, PartialPhiPropagation1) {
   HInstruction* goto_left_merge = new (GetAllocator()) HGoto();
   left_phi->SetRawInputAt(0, obj_param);
   left_phi->SetRawInputAt(1, new_inst);
-  call_left->AsInvoke()->SetRawInputAt(0, left_phi);
+  call_left->SetRawInputAt(0, left_phi);
   left_merge->AddPhi(left_phi);
   left_merge->AddInstruction(call_left);
   left_merge->AddInstruction(goto_left_merge);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9cabb12a9f..58fdd1cd05 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -1080,10 +1080,12 @@ class LSEVisitor final : private HGraphDelegateVisitor {
   }
 
   void VisitVecLoad(HVecLoad* instruction) override {
+    DCHECK(!instruction->IsPredicated());
     VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
   }
 
   void VisitVecStore(HVecStore* instruction) override {
+    DCHECK(!instruction->IsPredicated());
     size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
     VisitSetLocation(instruction, idx, instruction->GetValue());
   }
@@ -4041,6 +4043,13 @@ bool LoadStoreElimination::Run(bool enable_partial_lse) {
     return false;
   }
 
+  // Currently load_store analysis can't handle predicated load/stores; specifically pairs of
+  // memory operations with different predicates.
+  // TODO: support predicated SIMD.
+  if (graph_->HasPredicatedSIMD()) {
+    return false;
+  }
+
   std::unique_ptr<LSEVisitorWrapper> lse_visitor(new (&allocator) LSEVisitorWrapper(
       graph_, heap_location_collector, enable_partial_lse, stats_));
   lse_visitor->Run();
diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc
index 1ee109980f..d3cf8bfa2a 100644
--- a/compiler/optimizing/load_store_elimination_test.cc
+++ b/compiler/optimizing/load_store_elimination_test.cc
@@ -573,7 +573,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue2) {
   AddVecStore(entry_block_, array_, j_);
   HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vstore));
@@ -589,7 +590,8 @@ TEST_F(LoadStoreEliminationTest, SameHeapValue3) {
   AddVecStore(entry_block_, array_, i_add1_);
   HInstruction* vstore = AddVecStore(entry_block_, array_, i_);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vstore));
@@ -634,7 +636,8 @@ TEST_F(LoadStoreEliminationTest, OverlappingLoadStore) {
   AddArraySet(entry_block_, array_, i_, c1);
   HInstruction* vload5 = AddVecLoad(entry_block_, array_, i_);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_TRUE(IsRemoved(load1));
@@ -668,7 +671,8 @@ TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithoutSideEffects) {
   // a[j] = 1;
   HInstruction* array_set = AddArraySet(return_block_, array_, j_, c1);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_TRUE(IsRemoved(array_set));
@@ -701,12 +705,13 @@ TEST_F(LoadStoreEliminationTest, StoreAfterSIMDLoopWithSideEffects) {
   // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
   AddVecStore(loop_, array_, phi_);
   HInstruction* vload = AddVecLoad(loop_, array_, phi_);
-  AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+  AddVecStore(loop_, array_b, phi_, vload);
 
   // a[j] = 0;
   HInstruction* a_set = AddArraySet(return_block_, array_, j_, c0);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_TRUE(IsRemoved(vload));
@@ -740,12 +745,13 @@ TEST_F(LoadStoreEliminationTest, LoadAfterSIMDLoopWithSideEffects) {
   // b[phi,phi+1,phi+2,phi+3] = a[phi,phi+1,phi+2,phi+3];
   AddVecStore(loop_, array_, phi_);
   HInstruction* vload = AddVecLoad(loop_, array_, phi_);
-  AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+  AddVecStore(loop_, array_b, phi_, vload);
 
   // x = a[j];
   HInstruction* load = AddArrayGet(return_block_, array_, j_);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_TRUE(IsRemoved(vload));
@@ -786,7 +792,8 @@ TEST_F(LoadStoreEliminationTest, MergePredecessorVecStores) {
   // down: a[i,... i + 3] = [1,...1]
   HInstruction* vstore4 = AddVecStore(down, array_, i_, vdata);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_TRUE(IsRemoved(vstore2));
@@ -874,10 +881,11 @@ TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) {
   //    a[i,... i + 3] = [1,...1]
   HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_);
   HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
-  HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+  HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload);
   HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2));
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vstore1));
@@ -963,9 +971,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueInLoopWithoutWriteSideEffects)
   //    v = a[i,... i + 3]
   // array[0,... 3] = v
   HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
-  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vload));
@@ -987,9 +996,10 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValue) {
   // v = a[0,... 3]
   // array[0,... 3] = v
   HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
-  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vload));
@@ -1063,10 +1073,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValueInLoopWithoutWriteSideE
   // array[0] = v1
   HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
   HInstruction* load = AddArrayGet(loop_, array_a, phi_);
-  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
   HInstruction* store = AddArraySet(return_block_, array_, c0, load);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vload));
@@ -1094,10 +1105,11 @@ TEST_F(LoadStoreEliminationTest, VLoadAndLoadDefaultValue) {
   // array[0] = v1
   HInstruction* vload = AddVecLoad(pre_header_, array_a, c0);
   HInstruction* load = AddArrayGet(pre_header_, array_a, c0);
-  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload->AsVecLoad());
+  HInstruction* vstore = AddVecStore(return_block_, array_, c0, vload);
   HInstruction* store = AddArraySet(return_block_, array_, c0, load);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vload));
@@ -1126,10 +1138,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoadInLoopWithoutWriteSide
   // array[128,... 131] = v1
   HInstruction* vload1 = AddVecLoad(loop_, array_a, phi_);
   HInstruction* vload2 = AddVecLoad(loop_, array_a, phi_);
-  HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
-  HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+  HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1);
+  HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vload1));
@@ -1157,10 +1170,11 @@ TEST_F(LoadStoreEliminationTest, VLoadDefaultValueAndVLoad) {
   // array[128,... 131] = v1
   HInstruction* vload1 = AddVecLoad(pre_header_, array_a, c0);
   HInstruction* vload2 = AddVecLoad(pre_header_, array_a, c0);
-  HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1->AsVecLoad());
-  HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2->AsVecLoad());
+  HInstruction* vstore1 = AddVecStore(return_block_, array_, c0, vload1);
+  HInstruction* vstore2 = AddVecStore(return_block_, array_, c128, vload2);
 
-  graph_->SetHasSIMD(true);
+  // TODO: enable LSE for graphs with predicated SIMD.
+  graph_->SetHasTraditionalSIMD(true);
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vload1));
@@ -2139,9 +2153,9 @@ TEST_F(LoadStoreEliminationTest, PartialLoadElimination) {
   right->AddInstruction(read_right);
   right->AddInstruction(goto_right);
 
-  HInstruction* phi_final = MakePhi({read_left, read_right});
+  HPhi* phi_final = MakePhi({read_left, read_right});
   HInstruction* return_exit = new (GetAllocator()) HReturn(phi_final);
-  exit->AddPhi(phi_final->AsPhi());
+  exit->AddPhi(phi_final);
   exit->AddInstruction(return_exit);
 
   // PerformLSE expects this to be empty.
@@ -5153,7 +5167,7 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonAfterCohort) {
   CheckFinalInstruction(if_merge->InputAt(0), ComparisonPlacement::kAfterEscape);
   EXPECT_INS_EQ(init_set->InputAt(1), c3);
   ASSERT_TRUE(write_partial->InputAt(0)->IsPhi());
-  EXPECT_INS_EQ(write_partial->InputAt(0)->AsPhi()->InputAt(0), init_set->InputAt(0));
+  EXPECT_INS_EQ(write_partial->InputAt(0)->InputAt(0), init_set->InputAt(0));
   EXPECT_INS_EQ(write_partial->InputAt(1), c4);
   EXPECT_INS_EQ(pred_get->GetTarget(), merge_alloc);
   EXPECT_INS_EQ(pred_get->GetDefaultValue(), merge_value_return);
@@ -5225,14 +5239,14 @@ TEST_P(PartialComparisonTestGroup, PartialComparisonInCohortAfterEscape) {
 
   HInstruction* call_left = MakeInvoke(DataType::Type::kVoid, { new_inst });
   ComparisonInstructions cmp_instructions = GetComparisonInstructions(new_inst);
-  HInstruction* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_);
+  HIf* if_left = new (GetAllocator()) HIf(cmp_instructions.cmp_);
   left->AddInstruction(call_left);
   cmp_instructions.AddSetup(left);
   left->AddInstruction(cmp_instructions.cmp_);
   left->AddInstruction(if_left);
   call_left->CopyEnvironmentFrom(cls->GetEnvironment());
   cmp_instructions.AddEnvironment(cls->GetEnvironment());
-  if (if_left->AsIf()->IfTrueSuccessor() != partial) {
+  if (if_left->IfTrueSuccessor() != partial) {
     left->SwapSuccessors();
   }
 
@@ -5381,7 +5395,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) {
   right->AddInstruction(write_right);
   right->AddInstruction(goto_right);
 
-  HInstruction* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32));
+  HInstanceFieldSet* write_bottom = MakeIFieldSet(new_inst, c3, MemberOffset(32));
   HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
   breturn->AddInstruction(write_bottom);
   breturn->AddInstruction(return_exit);
@@ -5391,7 +5405,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore1) {
   PerformLSEWithPartial(blks);
 
   EXPECT_INS_RETAINED(write_bottom);
-  EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet());
+  EXPECT_TRUE(write_bottom->GetIsPredicatedSet());
   EXPECT_INS_REMOVED(write_right);
   EXPECT_INS_RETAINED(call_left);
   HPhi* merge_alloc = FindSingleInstruction<HPhi>(graph_, breturn);
@@ -5491,7 +5505,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) {
   non_escape->AddInstruction(non_escape_goto);
   non_escape_call->CopyEnvironmentFrom(cls->GetEnvironment());
 
-  HInstruction* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32));
+  HInstanceFieldSet* write_bottom = MakeIFieldSet(new_inst, c4, MemberOffset(32));
   HInstruction* return_exit = new (GetAllocator()) HReturnVoid();
   breturn->AddInstruction(write_bottom);
   breturn->AddInstruction(return_exit);
@@ -5501,7 +5515,7 @@ TEST_F(LoadStoreEliminationTest, PredicatedStore2) {
   PerformLSEWithPartial(blks);
 
   EXPECT_INS_RETAINED(write_bottom);
-  EXPECT_TRUE(write_bottom->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_bottom;
+  EXPECT_TRUE(write_bottom->GetIsPredicatedSet()) << *write_bottom;
   EXPECT_INS_REMOVED(write_right);
   EXPECT_INS_RETAINED(call_left);
   HInstanceFieldSet* pred_set = FindSingleInstruction<HInstanceFieldSet>(graph_, breturn);
@@ -6786,14 +6800,14 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis1) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
   EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
   ASSERT_TRUE(pred_get != nullptr);
-  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
   ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
   EXPECT_INS_EQ(inst_return_phi->InputAt(0),
                 FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor()));
   EXPECT_INS_EQ(inst_return_phi->InputAt(1),
                 FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor()));
   EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant());
-  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
   ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
   EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0));
   EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
@@ -6966,14 +6980,14 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis2) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
   EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
   ASSERT_TRUE(pred_get != nullptr);
-  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
   ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
   EXPECT_INS_EQ(inst_return_phi->InputAt(0),
                 FindSingleInstruction<HNewInstance>(graph_, case1->GetSinglePredecessor()));
   EXPECT_INS_EQ(inst_return_phi->InputAt(1),
                 FindSingleInstruction<HNewInstance>(graph_, case2->GetSinglePredecessor()));
   EXPECT_INS_EQ(inst_return_phi->InputAt(2), graph_->GetNullConstant());
-  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
   ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
   EXPECT_INS_EQ(inst_value_phi->InputAt(0), graph_->GetIntConstant(0));
   EXPECT_INS_EQ(inst_value_phi->InputAt(1), graph_->GetIntConstant(0));
@@ -7113,12 +7127,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis3) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
   EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
   ASSERT_TRUE(pred_get != nullptr);
-  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
   ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
   EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
   EXPECT_INS_EQ(inst_return_phi->InputAt(1),
                 FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
-  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
   ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
   HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
   HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
@@ -7213,7 +7227,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
   HInstruction* goto_no_escape = new (GetAllocator()) HGoto();
   no_escape->AddInstruction(goto_no_escape);
 
-  HInstruction* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32));
+  HInstanceFieldSet* write_pre_header = MakeIFieldSet(new_inst, c3, MemberOffset(32));
   HInstruction* goto_preheader = new (GetAllocator()) HGoto();
   loop_pre_header->AddInstruction(write_pre_header);
   loop_pre_header->AddInstruction(goto_preheader);
@@ -7236,7 +7250,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
   HInstruction* goto_loop_left = new (GetAllocator()) HGoto();
   loop_if_left->AddInstruction(goto_loop_left);
 
-  HInstruction* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
+  HInstanceFieldSet* write_loop_right = MakeIFieldSet(new_inst, c5, MemberOffset(32));
   HInstruction* goto_loop_right = new (GetAllocator()) HGoto();
   loop_if_right->AddInstruction(write_loop_right);
   loop_if_right->AddInstruction(goto_loop_right);
@@ -7257,12 +7271,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
   EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
   ASSERT_TRUE(pred_get != nullptr);
-  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
   ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
   EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
   EXPECT_INS_EQ(inst_return_phi->InputAt(1),
                 FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
-  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
   ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
   HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
   HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
@@ -7272,9 +7286,9 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis4) {
   EXPECT_INS_EQ(loop_merge_phi->InputAt(0), loop_header_phi);
   EXPECT_INS_EQ(loop_merge_phi->InputAt(1), c5);
   EXPECT_INS_RETAINED(write_loop_right) << *write_loop_right;
-  EXPECT_TRUE(write_loop_right->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_loop_right;
+  EXPECT_TRUE(write_loop_right->GetIsPredicatedSet()) << *write_loop_right;
   EXPECT_INS_RETAINED(write_pre_header) << *write_pre_header;
-  EXPECT_TRUE(write_pre_header->AsInstanceFieldSet()->GetIsPredicatedSet()) << *write_pre_header;
+  EXPECT_TRUE(write_pre_header->GetIsPredicatedSet()) << *write_pre_header;
 }
 
 // // ENTRY
@@ -7401,12 +7415,12 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis5) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
   EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
   ASSERT_TRUE(pred_get != nullptr);
-  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
   ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
   EXPECT_INS_EQ(inst_return_phi->InputAt(0), graph_->GetNullConstant());
   EXPECT_INS_EQ(inst_return_phi->InputAt(1),
                 FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
-  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhi();
+  HPhi* inst_value_phi = pred_get->GetDefaultValue()->AsPhiOrNull();
   ASSERT_TRUE(inst_value_phi != nullptr) << pred_get->GetDefaultValue()->DumpWithArgs();
   HPhi* loop_header_phi = FindSingleInstruction<HPhi>(graph_, loop_header);
   HPhi* loop_merge_phi = FindSingleInstruction<HPhi>(graph_, loop_merge);
@@ -7562,7 +7576,7 @@ TEST_F(LoadStoreEliminationTest, PartialLoopPhis6) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
   EXPECT_INS_REMOVED(read_bottom) << *read_bottom;
   ASSERT_TRUE(pred_get != nullptr);
-  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhi();
+  HPhi* inst_return_phi = pred_get->GetTarget()->AsPhiOrNull();
   ASSERT_TRUE(inst_return_phi != nullptr) << pred_get->GetTarget()->DumpWithArgs();
   EXPECT_INS_EQ(inst_return_phi->InputAt(0),
                 FindSingleInstruction<HNewInstance>(graph_, escape->GetSinglePredecessor()));
@@ -8268,13 +8282,13 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle);
   ASSERT_NE(replacement_middle_read, nullptr);
   ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi());
-  ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
-  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull);
+  ASSERT_EQ(2u, replacement_middle_read->GetTarget()->InputCount());
+  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(0), replacement_new_inst);
+  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(1), cnull);
   ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi());
-  ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
-  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
+  ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->InputCount());
+  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(0), c0);
+  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(1), c11);
 
   EXPECT_INS_RETAINED(left2_write);
   ASSERT_TRUE(left2_write->GetIsPredicatedSet());
@@ -8285,9 +8299,9 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements1) {
   ASSERT_NE(replacement_breturn_read, nullptr);
   ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle_read->GetTarget());
   ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi());
-  ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0), c33);
-  HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1);
+  ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->InputCount());
+  ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(0), c33);
+  HInstruction* other_input = replacement_breturn_read->GetDefaultValue()->InputAt(1);
   ASSERT_NE(other_input->GetBlock(), nullptr) << GetParam();
   ASSERT_INS_EQ(other_input, replacement_middle_read);
 }
@@ -8423,13 +8437,13 @@ TEST_P(UsesOrderDependentTestGroup, RecordPredicatedReplacements2) {
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle);
   ASSERT_NE(replacement_middle_read, nullptr);
   ASSERT_TRUE(replacement_middle_read->GetTarget()->IsPhi());
-  ASSERT_EQ(2u, replacement_middle_read->GetTarget()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
-  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->AsPhi()->InputAt(1), cnull);
+  ASSERT_EQ(2u, replacement_middle_read->GetTarget()->InputCount());
+  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(0), replacement_new_inst);
+  ASSERT_INS_EQ(replacement_middle_read->GetTarget()->InputAt(1), cnull);
   ASSERT_TRUE(replacement_middle_read->GetDefaultValue()->IsPhi());
-  ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
-  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
+  ASSERT_EQ(2u, replacement_middle_read->GetDefaultValue()->InputCount());
+  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(0), c0);
+  ASSERT_INS_EQ(replacement_middle_read->GetDefaultValue()->InputAt(1), c11);
 
   EXPECT_INS_RETAINED(left2_call);
 
@@ -8627,13 +8641,13 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3)
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, middle1);
   ASSERT_NE(replacement_middle1_read, nullptr);
   ASSERT_TRUE(replacement_middle1_read->GetTarget()->IsPhi());
-  ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(0), replacement_new_inst);
-  ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->AsPhi()->InputAt(1), cnull);
+  ASSERT_EQ(2u, replacement_middle1_read->GetTarget()->InputCount());
+  ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->InputAt(0), replacement_new_inst);
+  ASSERT_INS_EQ(replacement_middle1_read->GetTarget()->InputAt(1), cnull);
   ASSERT_TRUE(replacement_middle1_read->GetDefaultValue()->IsPhi());
-  ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(0), c0);
-  ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->AsPhi()->InputAt(1), c11);
+  ASSERT_EQ(2u, replacement_middle1_read->GetDefaultValue()->InputCount());
+  ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->InputAt(0), c0);
+  ASSERT_INS_EQ(replacement_middle1_read->GetDefaultValue()->InputAt(1), c11);
 
   EXPECT_INS_RETAINED(left2_call);
 
@@ -8652,11 +8666,10 @@ TEST_P(UsesOrderDependentTestGroupForThreeItems, RecordPredicatedReplacements3)
       FindSingleInstruction<HPredicatedInstanceFieldGet>(graph_, breturn);
   ASSERT_NE(replacement_breturn_read, nullptr);
   ASSERT_INS_EQ(replacement_breturn_read->GetTarget(), replacement_middle1_read->GetTarget());
-  ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->AsPhi()->InputCount());
-  ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(0),
-                replacement_left3_read);
-  ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->AsPhi()->InputAt(1),
-                replacement_middle1_read);
+  ASSERT_TRUE(replacement_breturn_read->GetDefaultValue()->IsPhi());
+  ASSERT_EQ(2u, replacement_breturn_read->GetDefaultValue()->InputCount());
+  ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(0), replacement_left3_read);
+  ASSERT_INS_EQ(replacement_breturn_read->GetDefaultValue()->InputAt(1), replacement_middle1_read);
   EXPECT_INS_RETAINED(breturn_add1);
   ASSERT_INS_EQ(breturn_add1->InputAt(0), replacement_middle1_read);
   ASSERT_INS_EQ(breturn_add1->InputAt(1), replacement_breturn_read);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index f40b7f4f0c..4189bc4053 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -62,7 +62,7 @@ Location Location::RegisterOrConstant(HInstruction* instruction) {
 }
 
 Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
-  HConstant* constant = instruction->AsConstant();
+  HConstant* constant = instruction->AsConstantOrNull();
   if (constant != nullptr) {
     int64_t value = CodeGenerator::GetInt64ValueOf(constant);
     if (IsInt<32>(value)) {
@@ -73,7 +73,7 @@ Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
 }
 
 Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
-  HConstant* constant = instruction->AsConstant();
+  HConstant* constant = instruction->AsConstantOrNull();
   if (constant != nullptr) {
     int64_t value = CodeGenerator::GetInt64ValueOf(constant);
     if (IsInt<32>(value)) {
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index 95e81533da..6163624a97 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -42,7 +42,7 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
         // not cause loop peeling to happen as they either cannot be inside a loop, or by
         // definition cannot be loop exits (unconditional instructions), or are not beneficial for
         // the optimization.
-        HIf* hif = block->GetLastInstruction()->AsIf();
+        HIf* hif = block->GetLastInstruction()->AsIfOrNull();
         if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
           analysis_results->invariant_exits_num_++;
         }
@@ -259,7 +259,7 @@ class X86_64LoopHelper : public ArchDefaultLoopHelper {
       case HInstruction::InstructionKind::kVecReplicateScalar:
         return 2;
       case HInstruction::InstructionKind::kVecExtractScalar:
-       return 1;
+        return 1;
       case HInstruction::InstructionKind::kVecReduce:
         return 4;
       case HInstruction::InstructionKind::kVecNeg:
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index cec00fecf4..cd8f00588d 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -148,13 +148,15 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
   //
   // Returns 'true' by default, should be overridden by particular target loop helper.
   virtual bool IsLoopNonBeneficialForScalarOpts(
-      LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+      [[maybe_unused]] LoopAnalysisInfo* loop_analysis_info) const {
+    return true;
+  }
 
   // Returns optimal scalar unrolling factor for the loop.
   //
   // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
   virtual uint32_t GetScalarUnrollingFactor(
-      const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+      [[maybe_unused]] const LoopAnalysisInfo* analysis_info) const {
     return LoopAnalysisInfo::kNoUnrollingFactor;
   }
 
@@ -166,17 +168,17 @@ class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
   // Returns whether it is beneficial to fully unroll the loop.
   //
   // Returns 'false' by default, should be overridden by particular target loop helper.
-  virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+  virtual bool IsFullUnrollingBeneficial([[maybe_unused]] LoopAnalysisInfo* analysis_info) const {
     return false;
   }
 
   // Returns optimal SIMD unrolling factor for the loop.
   //
   // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
-  virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
-                                          int64_t trip_count ATTRIBUTE_UNUSED,
-                                          uint32_t max_peel ATTRIBUTE_UNUSED,
-                                          uint32_t vector_length ATTRIBUTE_UNUSED) const {
+  virtual uint32_t GetSIMDUnrollingFactor([[maybe_unused]] HBasicBlock* block,
+                                          [[maybe_unused]] int64_t trip_count,
+                                          [[maybe_unused]] uint32_t max_peel,
+                                          [[maybe_unused]] uint32_t vector_length) const {
     return LoopAnalysisInfo::kNoUnrollingFactor;
   }
 
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 7a52502562..f6d69ca789 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -366,8 +366,8 @@ static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
   return (restrictions & tested) != 0;
 }
 
-// Insert an instruction.
-static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+// Insert an instruction at the end of the block, with safe checks.
+inline HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
   DCHECK(instruction != nullptr);
   block->InsertInstructionBefore(instruction, block->GetLastInstruction());
@@ -418,7 +418,7 @@ static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) {
       ++it;
       if (true_succ->Dominates(user_block)) {
         user->ReplaceInput(graph->GetIntConstant(1), index);
-     } else if (false_succ->Dominates(user_block)) {
+      } else if (false_succ->Dominates(user_block)) {
         user->ReplaceInput(graph->GetIntConstant(0), index);
       }
     }
@@ -453,6 +453,54 @@ static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) {
   return type;
 }
 
+// Returns whether the loop is of a diamond structure:
+//
+//                header <----------------+
+//                  |                     |
+//             diamond_hif                |
+//                /   \                   |
+//     diamond_true  diamond_false        |
+//                \   /                   |
+//              back_edge                 |
+//                  |                     |
+//                  +---------------------+
+static bool HasLoopDiamondStructure(HLoopInformation* loop_info) {
+  HBasicBlock* header = loop_info->GetHeader();
+  if (loop_info->NumberOfBackEdges() != 1 || header->GetSuccessors().size() != 2) {
+    return false;
+  }
+  HBasicBlock* header_succ_0 = header->GetSuccessors()[0];
+  HBasicBlock* header_succ_1 = header->GetSuccessors()[1];
+  HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ?
+                                  header_succ_0 :
+                                  header_succ_1;
+  if (!diamond_top->GetLastInstruction()->IsIf()) {
+    return false;
+  }
+
+  HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf();
+  HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor();
+  HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor();
+
+  if (diamond_true->GetSuccessors().size() != 1 || diamond_false->GetSuccessors().size() != 1) {
+    return false;
+  }
+
+  HBasicBlock* back_edge = diamond_true->GetSingleSuccessor();
+  if (back_edge != diamond_false->GetSingleSuccessor() ||
+      back_edge != loop_info->GetBackEdges()[0]) {
+    return false;
+  }
+
+  DCHECK_EQ(loop_info->GetBlocks().NumSetBits(), 5u);
+  return true;
+}
+
+static bool IsPredicatedLoopControlFlowSupported(HLoopInformation* loop_info) {
+  size_t num_of_blocks = loop_info->GetBlocks().NumSetBits();
+  return num_of_blocks == 2 || HasLoopDiamondStructure(loop_info);
+}
+
 //
 // Public methods.
 //
@@ -482,6 +530,8 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
       vector_runtime_test_b_(nullptr),
       vector_map_(nullptr),
       vector_permanent_map_(nullptr),
+      vector_external_set_(nullptr),
+      predicate_info_map_(nullptr),
       vector_mode_(kSequential),
       vector_preheader_(nullptr),
       vector_header_(nullptr),
@@ -542,12 +592,17 @@ bool HLoopOptimization::LocalRun() {
       std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
   ScopedArenaSafeMap<HInstruction*, HInstruction*> perm(
       std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+  ScopedArenaSet<HInstruction*> ext_set(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+  ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*> pred(
+      std::less<HBasicBlock*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
   // Attach.
   iset_ = &iset;
   reductions_ = &reds;
   vector_refs_ = &refs;
   vector_map_ = &map;
   vector_permanent_map_ = &perm;
+  vector_external_set_ = &ext_set;
+  predicate_info_map_ = &pred;
   // Traverse.
   const bool did_loop_opt = TraverseLoopsInnerToOuter(top_loop_);
   // Detach.
@@ -556,6 +611,9 @@ bool HLoopOptimization::LocalRun() {
   vector_refs_ = nullptr;
   vector_map_ = nullptr;
   vector_permanent_map_ = nullptr;
+  vector_external_set_ = nullptr;
+  predicate_info_map_ = nullptr;
+
   return did_loop_opt;
 }
 
@@ -787,6 +845,37 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
   }
 }
 
+// Checks whether the loop has exit structure suitable for InnerLoopFinite optimization:
+//  - has single loop exit.
+//  - the exit block has only single predecessor - a block inside the loop.
+//
+// In that case returns single exit basic block (outside the loop); otherwise nullptr.
+static HBasicBlock* GetInnerLoopFiniteSingleExit(HLoopInformation* loop_info) {
+  HBasicBlock* exit = nullptr;
+  for (HBlocksInLoopIterator block_it(*loop_info);
+       !block_it.Done();
+       block_it.Advance()) {
+    HBasicBlock* block = block_it.Current();
+
+    // Check whether one of the successor is loop exit.
+    for (HBasicBlock* successor : block->GetSuccessors()) {
+      if (!loop_info->Contains(*successor)) {
+        if (exit != nullptr) {
+          // The loop has more than one exit.
+          return nullptr;
+        }
+        exit = successor;
+
+        // Ensure exit can only be reached by exiting loop.
+        if (successor->GetPredecessors().size() != 1) {
+          return nullptr;
+        }
+      }
+    }
+  }
+  return exit;
+}
+
 bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -795,33 +884,22 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
   if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
     return false;
   }
-  // Ensure there is only a single loop-body (besides the header).
-  HBasicBlock* body = nullptr;
-  for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
-    if (it.Current() != header) {
-      if (body != nullptr) {
-        return false;
-      }
-      body = it.Current();
-    }
-  }
-  CHECK(body != nullptr);
-  // Ensure there is only a single exit point.
-  if (header->GetSuccessors().size() != 2) {
-    return false;
-  }
-  HBasicBlock* exit = (header->GetSuccessors()[0] == body)
-      ? header->GetSuccessors()[1]
-      : header->GetSuccessors()[0];
-  // Ensure exit can only be reached by exiting loop.
-  if (exit->GetPredecessors().size() != 1) {
+  // Check loop exits.
+  HBasicBlock* exit = GetInnerLoopFiniteSingleExit(node->loop_info);
+  if (exit == nullptr) {
     return false;
   }
+
+  HBasicBlock* body = (header->GetSuccessors()[0] == exit)
+    ? header->GetSuccessors()[1]
+    : header->GetSuccessors()[0];
   // Detect either an empty loop (no side effects other than plain iteration) or
   // a trivial loop (just iterating once). Replace subsequent index uses, if any,
   // with the last value and remove the loop, possibly after unrolling its body.
   HPhi* main_phi = nullptr;
-  if (TrySetSimpleLoopHeader(header, &main_phi)) {
+  size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits();
+
+  if (num_of_blocks == 2 && TrySetSimpleLoopHeader(header, &main_phi)) {
     bool is_empty = IsEmptyBody(body);
     if (reductions_->empty() &&  // TODO: possible with some effort
         (is_empty || trip_count == 1) &&
@@ -845,21 +923,61 @@ bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
     }
   }
   // Vectorize loop, if possible and valid.
-  if (kEnableVectorization &&
+  if (!kEnableVectorization ||
       // Disable vectorization for debuggable graphs: this is a workaround for the bug
       // in 'GenerateNewLoop' which caused the SuspendCheck environment to be invalid.
       // TODO: b/138601207, investigate other possible cases with wrong environment values and
       // possibly switch back vectorization on for debuggable graphs.
-      !graph_->IsDebuggable() &&
-      TrySetSimpleLoopHeader(header, &main_phi) &&
-      ShouldVectorize(node, body, trip_count) &&
-      TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
-    Vectorize(node, body, exit, trip_count);
-    graph_->SetHasSIMD(true);  // flag SIMD usage
-    MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
-    return true;
+      graph_->IsDebuggable()) {
+    return false;
+  }
+
+  if (IsInPredicatedVectorizationMode()) {
+    return TryVectorizePredicated(node, body, exit, main_phi, trip_count);
+  } else {
+    return TryVectorizedTraditional(node, body, exit, main_phi, trip_count);
   }
-  return false;
+}
+
+bool HLoopOptimization::TryVectorizePredicated(LoopNode* node,
+                                               HBasicBlock* body,
+                                               HBasicBlock* exit,
+                                               HPhi* main_phi,
+                                               int64_t trip_count) {
+  if (!IsPredicatedLoopControlFlowSupported(node->loop_info) ||
+      !ShouldVectorizeCommon(node, main_phi, trip_count)) {
+    return false;
+  }
+
+  // Currently we can only generate cleanup loops for loops with 2 basic block.
+  //
+  // TODO: Support array disambiguation tests for CF loops.
+  if (NeedsArrayRefsDisambiguationTest() &&
+      node->loop_info->GetBlocks().NumSetBits() != 2) {
+    return false;
+  }
+
+  VectorizePredicated(node, body, exit);
+  MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
+  graph_->SetHasPredicatedSIMD(true);  // flag SIMD usage
+  return true;
+}
+
+bool HLoopOptimization::TryVectorizedTraditional(LoopNode* node,
+                                                HBasicBlock* body,
+                                                HBasicBlock* exit,
+                                                HPhi* main_phi,
+                                                int64_t trip_count) {
+  HBasicBlock* header = node->loop_info->GetHeader();
+  size_t num_of_blocks = header->GetLoopInformation()->GetBlocks().NumSetBits();
+
+  if (num_of_blocks != 2 || !ShouldVectorizeCommon(node, main_phi, trip_count)) {
+    return false;
+  }
+  VectorizeTraditional(node, body, exit, trip_count);
+  MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized);
+  graph_->SetHasTraditionalSIMD(true);  // flag SIMD usage
+  return true;
 }
 
 bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
@@ -1006,7 +1124,10 @@ bool HLoopOptimization::TryLoopScalarOpts(LoopNode* node) {
 // Intel Press, June, 2004 (http://www.aartbik.com/).
 //
 
-bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) {
+
+bool HLoopOptimization::CanVectorizeDataFlow(LoopNode* node,
+                                             HBasicBlock* header,
+                                             bool collect_alignment_info) {
   // Reset vector bookkeeping.
   vector_length_ = 0;
   vector_refs_->clear();
@@ -1015,16 +1136,30 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
   vector_runtime_test_a_ =
   vector_runtime_test_b_ = nullptr;
 
-  // Phis in the loop-body prevent vectorization.
-  if (!block->GetPhis().IsEmpty()) {
-    return false;
-  }
+  // Traverse the data flow of the loop, in the original program order.
+  for (HBlocksInLoopReversePostOrderIterator block_it(*header->GetLoopInformation());
+       !block_it.Done();
+       block_it.Advance()) {
+    HBasicBlock* block = block_it.Current();
 
-  // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side
-  // occurrence, which allows passing down attributes down the use tree.
-  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-    if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
-      return false;  // failure to vectorize a left-hand-side
+    if (block == header) {
+      // The header is of a certain structure (TrySetSimpleLoopHeader) and doesn't need to be
+      // processed here.
+      continue;
+    }
+
+    // Phis in the loop-body prevent vectorization.
+    // TODO: Enable vectorization of CF loops with Phis.
+    if (!block->GetPhis().IsEmpty()) {
+      return false;
+    }
+
+    // Scan the loop-body instructions, starting a right-hand-side tree traversal at each
+    // left-hand-side occurrence, which allows passing down attributes down the use tree.
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
+        return false;  // failure to vectorize a left-hand-side
+      }
     }
   }
 
@@ -1111,24 +1246,123 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6
     }
   }  // for i
 
-  if (!IsInPredicatedVectorizationMode()) {
-    // Find a suitable alignment strategy.
+  if (collect_alignment_info) {
+    // Update the info on alignment strategy.
     SetAlignmentStrategy(peeling_votes, peeling_candidate);
   }
 
-  // Does vectorization seem profitable?
-  if (!IsVectorizationProfitable(trip_count)) {
+  // Success!
+  return true;
+}
+
+bool HLoopOptimization::ShouldVectorizeCommon(LoopNode* node,
+                                              HPhi* main_phi,
+                                              int64_t trip_count) {
+  HBasicBlock* header = node->loop_info->GetHeader();
+  HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+  bool enable_alignment_strategies = !IsInPredicatedVectorizationMode();
+  if (!TrySetSimpleLoopHeader(header, &main_phi) ||
+      !CanVectorizeDataFlow(node, header, enable_alignment_strategies) ||
+      !IsVectorizationProfitable(trip_count) ||
+      !TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) {
     return false;
   }
 
-  // Success!
   return true;
 }
 
-void HLoopOptimization::Vectorize(LoopNode* node,
-                                  HBasicBlock* block,
-                                  HBasicBlock* exit,
-                                  int64_t trip_count) {
+void HLoopOptimization::VectorizePredicated(LoopNode* node,
+                                            HBasicBlock* block,
+                                            HBasicBlock* exit) {
+  DCHECK(IsInPredicatedVectorizationMode());
+
+  HBasicBlock* header = node->loop_info->GetHeader();
+  HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+  // Adjust vector bookkeeping.
+  HPhi* main_phi = nullptr;
+  bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi);  // refills sets
+  DCHECK(is_simple_loop_header);
+  vector_header_ = header;
+  vector_body_ = block;
+
+  // Loop induction type.
+  DataType::Type induc_type = main_phi->GetType();
+  DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64)
+      << induc_type;
+
+  // Generate loop control:
+  // stc = <trip-count>;
+  // vtc = <vector trip-count>
+  HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
+  HInstruction* vtc = stc;
+  vector_index_ = graph_->GetConstant(induc_type, 0);
+  bool needs_disambiguation_test = false;
+  // Generate runtime disambiguation test:
+  // vtc = a != b ? vtc : 0;
+  if (NeedsArrayRefsDisambiguationTest()) {
+    HInstruction* rt = Insert(
+        preheader,
+        new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
+    vtc = Insert(preheader,
+                 new (global_allocator_)
+                 HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc));
+    needs_disambiguation_test = true;
+  }
+
+  // Generate vector loop:
+  // for ( ; i < vtc; i += vector_length)
+  //    <vectorized-loop-body>
+  HBasicBlock* preheader_for_vector_loop =
+      graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+  vector_mode_ = kVector;
+  GenerateNewLoopPredicated(node,
+                            preheader_for_vector_loop,
+                            vector_index_,
+                            vtc,
+                            graph_->GetConstant(induc_type, vector_length_));
+
+  // Generate scalar loop, if needed:
+  // for ( ; i < stc; i += 1)
+  //    <loop-body>
+  if (needs_disambiguation_test) {
+    vector_mode_ = kSequential;
+    HBasicBlock* preheader_for_cleanup_loop =
+        graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+    // Use "Traditional" version for the sequential loop.
+    GenerateNewLoopScalarOrTraditional(node,
+                                       preheader_for_cleanup_loop,
+                                       vector_index_,
+                                       stc,
+                                       graph_->GetConstant(induc_type, 1),
+                                       LoopAnalysisInfo::kNoUnrollingFactor);
+  }
+
+  FinalizeVectorization(node);
+
+  // Assign governing predicates for the predicated instructions inserted during vectorization
+  // outside the loop.
+  for (auto it : *vector_external_set_) {
+    DCHECK(it->IsVecOperation());
+    HVecOperation* vec_op = it->AsVecOperation();
+
+    HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
+                                                                      graph_->GetIntConstant(1),
+                                                                      vec_op->GetPackedType(),
+                                                                      vec_op->GetVectorLength(),
+                                                                      0u);
+    vec_op->GetBlock()->InsertInstructionBefore(set_pred, vec_op);
+    vec_op->SetMergingGoverningPredicate(set_pred);
+  }
+}
+
+void HLoopOptimization::VectorizeTraditional(LoopNode* node,
+                                             HBasicBlock* block,
+                                             HBasicBlock* exit,
+                                             int64_t trip_count) {
+  DCHECK(!IsInPredicatedVectorizationMode());
+
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
 
@@ -1141,7 +1375,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
 
   // A cleanup loop is needed, at least, for any unknown trip count or
   // for a known trip count with remainder iterations after vectorization.
-  bool needs_cleanup = !IsInPredicatedVectorizationMode() &&
+  bool needs_cleanup =
       (trip_count == 0 || ((trip_count - vector_static_peeling_factor_) % chunk) != 0);
 
   // Adjust vector bookkeeping.
@@ -1160,13 +1394,11 @@ void HLoopOptimization::Vectorize(LoopNode* node,
   // ptc = <peeling factor>;
   HInstruction* ptc = nullptr;
   if (vector_static_peeling_factor_ != 0) {
-    DCHECK(!IsInPredicatedVectorizationMode());
     // Static loop peeling for SIMD alignment (using the most suitable
     // fixed peeling factor found during prior alignment analysis).
     DCHECK(vector_dynamic_peeling_candidate_ == nullptr);
     ptc = graph_->GetConstant(induc_type, vector_static_peeling_factor_);
   } else if (vector_dynamic_peeling_candidate_ != nullptr) {
-    DCHECK(!IsInPredicatedVectorizationMode());
     // Dynamic loop peeling for SIMD alignment (using the most suitable
     // candidate found during prior alignment analysis):
     // rem = offset % ALIGN;    // adjusted as #elements
@@ -1197,7 +1429,6 @@ void HLoopOptimization::Vectorize(LoopNode* node,
   HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
   HInstruction* vtc = stc;
   if (needs_cleanup) {
-    DCHECK(!IsInPredicatedVectorizationMode());
     DCHECK(IsPowerOfTwo(chunk));
     HInstruction* diff = stc;
     if (ptc != nullptr) {
@@ -1217,7 +1448,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
 
   // Generate runtime disambiguation test:
   // vtc = a != b ? vtc : 0;
-  if (vector_runtime_test_a_ != nullptr) {
+  if (NeedsArrayRefsDisambiguationTest()) {
     HInstruction* rt = Insert(
         preheader,
         new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
@@ -1235,45 +1466,52 @@ void HLoopOptimization::Vectorize(LoopNode* node,
   //       moved around during suspend checks, since all analysis was based on
   //       nothing more than the Android runtime alignment conventions.
   if (ptc != nullptr) {
-    DCHECK(!IsInPredicatedVectorizationMode());
     vector_mode_ = kSequential;
-    GenerateNewLoop(node,
-                    block,
-                    graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
-                    vector_index_,
-                    ptc,
-                    graph_->GetConstant(induc_type, 1),
-                    LoopAnalysisInfo::kNoUnrollingFactor);
+    HBasicBlock* preheader_for_peeling_loop =
+        graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+    GenerateNewLoopScalarOrTraditional(node,
+                                       preheader_for_peeling_loop,
+                                       vector_index_,
+                                       ptc,
+                                       graph_->GetConstant(induc_type, 1),
+                                       LoopAnalysisInfo::kNoUnrollingFactor);
   }
 
   // Generate vector loop, possibly further unrolled:
   // for ( ; i < vtc; i += chunk)
   //    <vectorized-loop-body>
   vector_mode_ = kVector;
-  GenerateNewLoop(node,
-                  block,
-                  graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
-                  vector_index_,
-                  vtc,
-                  graph_->GetConstant(induc_type, vector_length_),  // increment per unroll
-                  unroll);
-  HLoopInformation* vloop = vector_header_->GetLoopInformation();
+  HBasicBlock* preheader_for_vector_loop =
+      graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+  GenerateNewLoopScalarOrTraditional(node,
+                                     preheader_for_vector_loop,
+                                     vector_index_,
+                                     vtc,
+                                     graph_->GetConstant(induc_type, vector_length_),  // per unroll
+                                     unroll);
 
   // Generate cleanup loop, if needed:
   // for ( ; i < stc; i += 1)
   //    <loop-body>
   if (needs_cleanup) {
-    DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), vector_runtime_test_a_ != nullptr);
     vector_mode_ = kSequential;
-    GenerateNewLoop(node,
-                    block,
-                    graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
-                    vector_index_,
-                    stc,
-                    graph_->GetConstant(induc_type, 1),
-                    LoopAnalysisInfo::kNoUnrollingFactor);
+    HBasicBlock* preheader_for_cleanup_loop =
+        graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit);
+    GenerateNewLoopScalarOrTraditional(node,
+                                       preheader_for_cleanup_loop,
+                                       vector_index_,
+                                       stc,
+                                       graph_->GetConstant(induc_type, 1),
+                                       LoopAnalysisInfo::kNoUnrollingFactor);
   }
 
+  FinalizeVectorization(node);
+}
+
+void HLoopOptimization::FinalizeVectorization(LoopNode* node) {
+  HBasicBlock* header = node->loop_info->GetHeader();
+  HBasicBlock* preheader = node->loop_info->GetPreHeader();
+  HLoopInformation* vloop = vector_header_->GetLoopInformation();
   // Link reductions to their final uses.
   for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
     if (i->first->IsPhi()) {
@@ -1287,9 +1525,17 @@ void HLoopOptimization::Vectorize(LoopNode* node,
     }
   }
 
-  // Remove the original loop by disconnecting the body block
-  // and removing all instructions from the header.
-  block->DisconnectAndDelete();
+  // Remove the original loop.
+  for (HBlocksInLoopPostOrderIterator it_loop(*node->loop_info);
+       !it_loop.Done();
+       it_loop.Advance()) {
+    HBasicBlock* cur_block = it_loop.Current();
+    if (cur_block == node->loop_info->GetHeader()) {
+      continue;
+    }
+    cur_block->DisconnectAndDelete();
+  }
+
   while (!header->GetFirstInstruction()->IsGoto()) {
     header->RemoveInstruction(header->GetFirstInstruction());
   }
@@ -1301,14 +1547,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
   node->loop_info = vloop;
 }
 
-void HLoopOptimization::GenerateNewLoop(LoopNode* node,
-                                        HBasicBlock* block,
-                                        HBasicBlock* new_preheader,
-                                        HInstruction* lo,
-                                        HInstruction* hi,
-                                        HInstruction* step,
-                                        uint32_t unroll) {
-  DCHECK(unroll == 1 || vector_mode_ == kVector);
+HPhi* HLoopOptimization::InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo) {
   DataType::Type induc_type = lo->GetType();
   // Prepare new loop.
   vector_preheader_ = new_preheader,
@@ -1318,68 +1557,160 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node,
                                            kNoRegNumber,
                                            0,
                                            HPhi::ToPhiType(induc_type));
-  // Generate header and prepare body.
-  // for (i = lo; i < hi; i += step)
-  //    <loop-body>
-  HInstruction* cond = nullptr;
-  HInstruction* set_pred = nullptr;
-  if (IsInPredicatedVectorizationMode()) {
-    HVecPredWhile* pred_while =
-        new (global_allocator_) HVecPredWhile(global_allocator_,
-                                              phi,
-                                              hi,
-                                              HVecPredWhile::CondKind::kLO,
-                                              DataType::Type::kInt32,
-                                              vector_length_,
-                                              0u);
-
-    cond = new (global_allocator_) HVecPredCondition(global_allocator_,
-                                                     pred_while,
-                                                     HVecPredCondition::PCondKind::kNFirst,
-                                                     DataType::Type::kInt32,
-                                                     vector_length_,
-                                                     0u);
-
-    vector_header_->AddPhi(phi);
-    vector_header_->AddInstruction(pred_while);
-    vector_header_->AddInstruction(cond);
-    set_pred = pred_while;
-  } else {
-    cond = new (global_allocator_) HAboveOrEqual(phi, hi);
-    vector_header_->AddPhi(phi);
-    vector_header_->AddInstruction(cond);
-  }
+  vector_header_->AddPhi(phi);
+  vector_index_ = phi;
+  vector_permanent_map_->clear();
+  vector_external_set_->clear();
+  predicate_info_map_->clear();
+
+  return phi;
+}
 
+void HLoopOptimization::GenerateNewLoopScalarOrTraditional(LoopNode* node,
+                                                           HBasicBlock* new_preheader,
+                                                           HInstruction* lo,
+                                                           HInstruction* hi,
+                                                           HInstruction* step,
+                                                           uint32_t unroll) {
+  DCHECK(unroll == 1 || vector_mode_ == kVector);
+  DataType::Type induc_type = lo->GetType();
+  HPhi* phi = InitializeForNewLoop(new_preheader, lo);
+
+  // Generate loop exit check.
+  HInstruction* cond = new (global_allocator_) HAboveOrEqual(phi, hi);
+  vector_header_->AddInstruction(cond);
   vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
-  vector_index_ = phi;
-  vector_permanent_map_->clear();  // preserved over unrolling
+
   for (uint32_t u = 0; u < unroll; u++) {
-    // Generate instruction map.
-    vector_map_->clear();
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    GenerateNewLoopBodyOnce(node, induc_type, step);
+  }
+
+  FinalizePhisForNewLoop(phi, lo);
+}
+
+void HLoopOptimization::GenerateNewLoopPredicated(LoopNode* node,
+                                                  HBasicBlock* new_preheader,
+                                                  HInstruction* lo,
+                                                  HInstruction* hi,
+                                                  HInstruction* step) {
+  DCHECK(IsInPredicatedVectorizationMode());
+  DCHECK_EQ(vector_mode_, kVector);
+  DataType::Type induc_type = lo->GetType();
+  HPhi* phi = InitializeForNewLoop(new_preheader, lo);
+
+  // Generate loop exit check.
+  HVecPredWhile* pred_while =
+      new (global_allocator_) HVecPredWhile(global_allocator_,
+                                            phi,
+                                            hi,
+                                            HVecPredWhile::CondKind::kLO,
+                                            DataType::Type::kInt32,
+                                            vector_length_,
+                                            0u);
+
+  HInstruction* cond =
+      new (global_allocator_) HVecPredToBoolean(global_allocator_,
+                                                pred_while,
+                                                HVecPredToBoolean::PCondKind::kNFirst,
+                                                DataType::Type::kInt32,
+                                                vector_length_,
+                                                0u);
+
+  vector_header_->AddInstruction(pred_while);
+  vector_header_->AddInstruction(cond);
+  vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
+
+  PreparePredicateInfoMap(node);
+  GenerateNewLoopBodyOnce(node, induc_type, step);
+  InitPredicateInfoMap(node, pred_while);
+
+  // Assign governing predicates for instructions in the loop; the traversal order doesn't matter.
+  for (HBlocksInLoopIterator block_it(*node->loop_info);
+       !block_it.Done();
+       block_it.Advance()) {
+    HBasicBlock* cur_block = block_it.Current();
+
+    for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
+      auto i = vector_map_->find(it.Current());
+      if (i != vector_map_->end()) {
+        HInstruction* instr = i->second;
+
+        if (!instr->IsVecOperation()) {
+          continue;
+        }
+        // There are cases when a vector instruction, which corresponds to some instruction in the
+        // original scalar loop, is located not in the newly created vector loop but
+        // in the vector loop preheader (and hence recorded in vector_external_set_).
+        //
+        // Governing predicates will be set for such instructions separately.
+        bool in_vector_loop = vector_header_->GetLoopInformation()->Contains(*instr->GetBlock());
+        DCHECK_IMPLIES(!in_vector_loop,
+                        vector_external_set_->find(instr) != vector_external_set_->end());
+
+        if (in_vector_loop &&
+            !instr->AsVecOperation()->IsPredicated()) {
+          HVecOperation* op = instr->AsVecOperation();
+          HVecPredSetOperation* pred = predicate_info_map_->Get(cur_block)->GetControlPredicate();
+          op->SetMergingGoverningPredicate(pred);
+        }
+      }
+    }
+  }
+
+  FinalizePhisForNewLoop(phi, lo);
+}
+
+void HLoopOptimization::GenerateNewLoopBodyOnce(LoopNode* node,
+                                                DataType::Type induc_type,
+                                                HInstruction* step) {
+  // Generate instruction map.
+  vector_map_->clear();
+  HLoopInformation* loop_info = node->loop_info;
+
+  // Traverse the data flow of the loop, in the original program order.
+  for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info);
+      !block_it.Done();
+      block_it.Advance()) {
+    HBasicBlock* cur_block = block_it.Current();
+
+    if (cur_block == loop_info->GetHeader()) {
+      continue;
+    }
+
+    for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
       bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
       DCHECK(vectorized_def);
     }
-    // Generate body from the instruction map, but in original program order.
-    HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+  }
+
+  // Generate body from the instruction map, in the original program order.
+  HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
+  for (HBlocksInLoopReversePostOrderIterator block_it(*loop_info);
+        !block_it.Done();
+        block_it.Advance()) {
+    HBasicBlock* cur_block = block_it.Current();
+
+    if (cur_block == loop_info->GetHeader()) {
+      continue;
+    }
+
+    for (HInstructionIterator it(cur_block->GetInstructions()); !it.Done(); it.Advance()) {
       auto i = vector_map_->find(it.Current());
       if (i != vector_map_->end() && !i->second->IsInBlock()) {
         Insert(vector_body_, i->second);
-        if (IsInPredicatedVectorizationMode() && i->second->IsVecOperation()) {
-          HVecOperation* op = i->second->AsVecOperation();
-          op->SetMergingGoverningPredicate(set_pred);
-        }
         // Deal with instructions that need an environment, such as the scalar intrinsics.
         if (i->second->NeedsEnvironment()) {
           i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
         }
       }
     }
-    // Generate the induction.
-    vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
-    Insert(vector_body_, vector_index_);
   }
+  // Generate the induction.
+  vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step);
+  Insert(vector_body_, vector_index_);
+}
+
+void HLoopOptimization::FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo) {
   // Finalize phi inputs for the reductions (if any).
   for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
     if (!i->first->IsPhi()) {
@@ -1442,10 +1773,13 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
         VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) ||
         (TrySetVectorType(type, &restrictions) &&
          VectorizeUse(node, instruction, generate_code, type, restrictions))) {
+      DCHECK(!instruction->IsPhi());
       if (generate_code) {
-        HInstruction* new_red = vector_map_->Get(instruction);
-        vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
-        vector_permanent_map_->Overwrite(redit->second, new_red);
+        HInstruction* new_red_vec_op = vector_map_->Get(instruction);
+        HInstruction* original_phi = redit->second;
+        DCHECK(original_phi->IsPhi());
+        vector_permanent_map_->Put(new_red_vec_op, vector_map_->Get(original_phi));
+        vector_permanent_map_->Overwrite(original_phi, new_red_vec_op);
       }
       return true;
     }
@@ -1455,6 +1789,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
   if (instruction->IsGoto()) {
     return true;
   }
+
+  if (instruction->IsIf()) {
+    return VectorizeIfCondition(node, instruction, generate_code, restrictions);
+  }
   // Otherwise accept only expressions with no effects outside the immediate loop-body.
   // Note that actual uses are inspected during right-hand-side tree traversal.
   return !IsUsedOutsideLoop(node->loop_info, instruction)
@@ -1485,9 +1823,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
     // Deal with vector restrictions.
     bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt();
 
-    if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt) ||
-                              IsInPredicatedVectorizationMode())) {
-      // TODO: Support CharAt for predicated mode.
+    if (is_string_char_at && (HasVectorRestrictions(restrictions, kNoStringCharAt))) {
       return false;
     }
     // Accept a right-hand-side array base[index] for
@@ -1676,6 +2012,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
     case InstructionSet::kThumb2:
       // Allow vectorization for all ARM devices, because Android assumes that
       // ARM 32-bit always supports advanced SIMD (64-bit SIMD).
+      *restrictions |= kNoIfCond;
       switch (type) {
         case DataType::Type::kBool:
         case DataType::Type::kUint8:
@@ -1701,6 +2038,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
         DCHECK_EQ(simd_register_size_ % DataType::Size(type), 0u);
         switch (type) {
           case DataType::Type::kBool:
+            *restrictions |= kNoDiv |
+                             kNoSignedHAdd |
+                             kNoUnsignedHAdd |
+                             kNoUnroundedHAdd |
+                             kNoSAD |
+                             kNoIfCond;
+            return TrySetVectorLength(type, vector_length);
           case DataType::Type::kUint8:
           case DataType::Type::kInt8:
             *restrictions |= kNoDiv |
@@ -1712,6 +2056,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
           case DataType::Type::kUint16:
           case DataType::Type::kInt16:
             *restrictions |= kNoDiv |
+                             kNoStringCharAt |   // TODO: support in predicated mode.
                              kNoSignedHAdd |
                              kNoUnsignedHAdd |
                              kNoUnroundedHAdd |
@@ -1722,13 +2067,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
             *restrictions |= kNoDiv | kNoSAD;
             return TrySetVectorLength(type, vector_length);
           case DataType::Type::kInt64:
-            *restrictions |= kNoDiv | kNoSAD;
+            *restrictions |= kNoDiv | kNoSAD | kNoIfCond;
             return TrySetVectorLength(type, vector_length);
           case DataType::Type::kFloat32:
-            *restrictions |= kNoReduction;
+            *restrictions |= kNoReduction | kNoIfCond;
             return TrySetVectorLength(type, vector_length);
           case DataType::Type::kFloat64:
-            *restrictions |= kNoReduction;
+            *restrictions |= kNoReduction | kNoIfCond;
             return TrySetVectorLength(type, vector_length);
           default:
             break;
@@ -1737,6 +2082,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
       } else {
         // Allow vectorization for all ARM devices, because Android assumes that
         // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD).
+        *restrictions |= kNoIfCond;
         switch (type) {
           case DataType::Type::kBool:
           case DataType::Type::kUint8:
@@ -1767,6 +2113,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
     case InstructionSet::kX86:
     case InstructionSet::kX86_64:
       // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD).
+      *restrictions |= kNoIfCond;
       if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
         switch (type) {
           case DataType::Type::kBool:
@@ -1855,15 +2202,7 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) {
       vector = new (global_allocator_)
           HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc);
       vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
-      if (IsInPredicatedVectorizationMode()) {
-        HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
-                                                                          graph_->GetIntConstant(1),
-                                                                          type,
-                                                                          vector_length_,
-                                                                          0u);
-        vector_preheader_->InsertInstructionBefore(set_pred, vector);
-        vector->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
-      }
+      vector_external_set_->insert(vector);
     }
     vector_map_->Put(org, vector);
   }
@@ -1936,18 +2275,18 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org,
   vector_map_->Put(org, vector);
 }
 
-void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
-  DCHECK(reductions_->find(phi) != reductions_->end());
-  DCHECK(reductions_->Get(phi->InputAt(1)) == phi);
+void HLoopOptimization::GenerateVecReductionPhi(HPhi* orig_phi) {
+  DCHECK(reductions_->find(orig_phi) != reductions_->end());
+  DCHECK(reductions_->Get(orig_phi->InputAt(1)) == orig_phi);
   HInstruction* vector = nullptr;
   if (vector_mode_ == kSequential) {
     HPhi* new_phi = new (global_allocator_) HPhi(
-        global_allocator_, kNoRegNumber, 0, phi->GetType());
+        global_allocator_, kNoRegNumber, 0, orig_phi->GetType());
     vector_header_->AddPhi(new_phi);
     vector = new_phi;
   } else {
     // Link vector reduction back to prior unrolled update, or a first phi.
-    auto it = vector_permanent_map_->find(phi);
+    auto it = vector_permanent_map_->find(orig_phi);
     if (it != vector_permanent_map_->end()) {
       vector = it->second;
     } else {
@@ -1957,7 +2296,7 @@ void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) {
       vector = new_phi;
     }
   }
-  vector_map_->Put(phi, vector);
+  vector_map_->Put(orig_phi, vector);
 }
 
 void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) {
@@ -1992,15 +2331,7 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r
                                                                     vector_length,
                                                                     kNoDexPc));
     }
-    if (IsInPredicatedVectorizationMode()) {
-      HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
-                                                                        graph_->GetIntConstant(1),
-                                                                        type,
-                                                                        vector_length,
-                                                                        0u);
-      vector_preheader_->InsertInstructionBefore(set_pred, new_init);
-      new_init->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
-    }
+    vector_external_set_->insert(new_init);
   } else {
     new_init = ReduceAndExtractIfNeeded(new_init);
   }
@@ -2026,23 +2357,15 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
       //    x = REDUCE( [x_1, .., x_n] )
       //    y = x_1
       // along the exit of the defining loop.
-      HInstruction* reduce = new (global_allocator_) HVecReduce(
+      HVecReduce* reduce = new (global_allocator_) HVecReduce(
           global_allocator_, instruction, type, vector_length, kind, kNoDexPc);
       exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
+      vector_external_set_->insert(reduce);
       instruction = new (global_allocator_) HVecExtractScalar(
           global_allocator_, reduce, type, vector_length, 0, kNoDexPc);
       exit->InsertInstructionAfter(instruction, reduce);
 
-      if (IsInPredicatedVectorizationMode()) {
-        HVecPredSetAll* set_pred = new (global_allocator_) HVecPredSetAll(global_allocator_,
-                                                                          graph_->GetIntConstant(1),
-                                                                          type,
-                                                                          vector_length,
-                                                                          0u);
-        exit->InsertInstructionBefore(set_pred, reduce);
-        reduce->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
-        instruction->AsVecOperation()->SetMergingGoverningPredicate(set_pred);
-      }
+      vector_external_set_->insert(instruction);
     }
   }
   return instruction;
@@ -2057,10 +2380,10 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
   } \
   break;
 
-void HLoopOptimization::GenerateVecOp(HInstruction* org,
-                                      HInstruction* opa,
-                                      HInstruction* opb,
-                                      DataType::Type type) {
+HInstruction* HLoopOptimization::GenerateVecOp(HInstruction* org,
+                                               HInstruction* opa,
+                                               HInstruction* opb,
+                                               DataType::Type type) {
   uint32_t dex_pc = org->GetDexPc();
   HInstruction* vector = nullptr;
   DataType::Type org_type = org->GetType();
@@ -2130,11 +2453,23 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
       GENERATE_VEC(
         new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
         new (global_allocator_) HAbs(org_type, opa, dex_pc));
+    case HInstruction::kEqual: {
+        // Special case.
+        if (vector_mode_ == kVector) {
+          vector = new (global_allocator_) HVecCondition(
+              global_allocator_, opa, opb, type, vector_length_, dex_pc);
+        } else {
+          DCHECK(vector_mode_ == kSequential);
+          UNREACHABLE();
+        }
+      }
+      break;
     default:
       break;
   }  // switch
   CHECK(vector != nullptr) << "Unsupported SIMD operator";
   vector_map_->Put(org, vector);
+  return vector;
 }
 
 #undef GENERATE_VEC
@@ -2374,6 +2709,89 @@ bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node,
   return false;
 }
 
+bool HLoopOptimization::VectorizeIfCondition(LoopNode* node,
+                                             HInstruction* hif,
+                                             bool generate_code,
+                                             uint64_t restrictions) {
+  DCHECK(hif->IsIf());
+  HInstruction* if_input = hif->InputAt(0);
+
+  if (!if_input->HasOnlyOneNonEnvironmentUse()) {
+    // Avoid the complications of the condition used as materialized boolean.
+    return false;
+  }
+
+  if (!if_input->IsEqual()) {
+    // TODO: Support other condition types.
+    return false;
+  }
+
+  HCondition* cond = if_input->AsCondition();
+  HInstruction* opa = cond->InputAt(0);
+  HInstruction* opb = cond->InputAt(1);
+  DataType::Type type = GetNarrowerType(opa, opb);
+
+  if (!DataType::IsIntegralType(type)) {
+    return false;
+  }
+
+  bool is_unsigned = false;
+  HInstruction* opa_promoted = opa;
+  HInstruction* opb_promoted = opb;
+  bool is_int_case = DataType::Type::kInt32 == opa->GetType() &&
+                     DataType::Type::kInt32 == opb->GetType();
+
+  // Condition arguments should be either both int32 or consistently extended signed/unsigned
+  // narrower operands.
+  if (!is_int_case &&
+      !IsNarrowerOperands(opa, opb, type, &opa_promoted, &opb_promoted, &is_unsigned)) {
+    return false;
+  }
+  type = HVecOperation::ToProperType(type, is_unsigned);
+
+  // For narrow types, explicit type conversion may have been
+  // optimized way, so set the no hi bits restriction here.
+  if (DataType::Size(type) <= 2) {
+    restrictions |= kNoHiBits;
+  }
+
+  if (!TrySetVectorType(type, &restrictions) ||
+      HasVectorRestrictions(restrictions, kNoIfCond)) {
+    return false;
+  }
+
+  if (generate_code && vector_mode_ != kVector) {  // de-idiom
+    opa_promoted = opa;
+    opb_promoted = opb;
+  }
+
+  if (VectorizeUse(node, opa_promoted, generate_code, type, restrictions) &&
+      VectorizeUse(node, opb_promoted, generate_code, type, restrictions)) {
+    if (generate_code) {
+      HInstruction* vec_cond = GenerateVecOp(cond,
+                                             vector_map_->Get(opa_promoted),
+                                             vector_map_->Get(opb_promoted),
+                                             type);
+
+      if (vector_mode_ == kVector) {
+          HInstruction* vec_pred_not = new (global_allocator_) HVecPredNot(
+              global_allocator_, vec_cond, type, vector_length_, hif->GetDexPc());
+
+          vector_map_->Put(hif, vec_pred_not);
+          BlockPredicateInfo* pred_info = predicate_info_map_->Get(hif->GetBlock());
+          pred_info->SetControlFlowInfo(vec_cond->AsVecPredSetOperation(),
+                                        vec_pred_not->AsVecPredSetOperation());
+        } else {
+          DCHECK(vector_mode_ == kSequential);
+          UNREACHABLE();
+      }
+    }
+    return true;
+  }
+
+  return false;
+}
+
 //
 // Vectorization heuristics.
 //
@@ -2423,6 +2841,8 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
   // TODO: trip count is really unsigned entity, provided the guarding test
   //       is satisfied; deal with this more carefully later
   uint32_t max_peel = MaxNumberPeeled();
+  // Peeling is not supported in predicated mode.
+  DCHECK_IMPLIES(IsInPredicatedVectorizationMode(), max_peel == 0u);
   if (vector_length_ == 0) {
     return false;  // nothing found
   } else if (trip_count < 0) {
@@ -2686,4 +3106,67 @@ bool HLoopOptimization::CanRemoveCycle() {
   return true;
 }
 
+void HLoopOptimization::PreparePredicateInfoMap(LoopNode* node) {
+  HLoopInformation* loop_info = node->loop_info;
+
+  DCHECK(IsPredicatedLoopControlFlowSupported(loop_info));
+
+  for (HBlocksInLoopIterator block_it(*loop_info);
+      !block_it.Done();
+      block_it.Advance()) {
+    HBasicBlock* cur_block = block_it.Current();
+    BlockPredicateInfo* pred_info = new (loop_allocator_) BlockPredicateInfo();
+
+    predicate_info_map_->Put(cur_block, pred_info);
+  }
+}
+
+void HLoopOptimization::InitPredicateInfoMap(LoopNode* node,
+                                             HVecPredSetOperation* loop_main_pred) {
+  HLoopInformation* loop_info = node->loop_info;
+  HBasicBlock* header = loop_info->GetHeader();
+  BlockPredicateInfo* header_info = predicate_info_map_->Get(header);
+  // Loop header is a special case; it doesn't have a false predicate because we
+  // would just exit the loop then.
+  header_info->SetControlFlowInfo(loop_main_pred, loop_main_pred);
+
+  size_t blocks_in_loop = header->GetLoopInformation()->GetBlocks().NumSetBits();
+  if (blocks_in_loop == 2) {
+    for (HBasicBlock* successor : header->GetSuccessors()) {
+      if (loop_info->Contains(*successor)) {
+        // This is loop second block - body.
+        BlockPredicateInfo* body_info = predicate_info_map_->Get(successor);
+        body_info->SetControlPredicate(loop_main_pred);
+        return;
+      }
+    }
+    UNREACHABLE();
+  }
+
+  // TODO: support predicated vectorization of CF loop of more complex structure.
+  DCHECK(HasLoopDiamondStructure(loop_info));
+  HBasicBlock* header_succ_0 = header->GetSuccessors()[0];
+  HBasicBlock* header_succ_1 = header->GetSuccessors()[1];
+  HBasicBlock* diamond_top = loop_info->Contains(*header_succ_0) ?
+                             header_succ_0 :
+                             header_succ_1;
+
+  HIf* diamond_hif = diamond_top->GetLastInstruction()->AsIf();
+  HBasicBlock* diamond_true = diamond_hif->IfTrueSuccessor();
+  HBasicBlock* diamond_false = diamond_hif->IfFalseSuccessor();
+  HBasicBlock* back_edge = diamond_true->GetSingleSuccessor();
+
+  BlockPredicateInfo* diamond_top_info = predicate_info_map_->Get(diamond_top);
+  BlockPredicateInfo* diamond_true_info = predicate_info_map_->Get(diamond_true);
+  BlockPredicateInfo* diamond_false_info = predicate_info_map_->Get(diamond_false);
+  BlockPredicateInfo* back_edge_info = predicate_info_map_->Get(back_edge);
+
+  diamond_top_info->SetControlPredicate(header_info->GetTruePredicate());
+
+  diamond_true_info->SetControlPredicate(diamond_top_info->GetTruePredicate());
+  diamond_false_info->SetControlPredicate(diamond_top_info->GetFalsePredicate());
+
+  back_edge_info->SetControlPredicate(header_info->GetTruePredicate());
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6dd778ba74..86a9f0fcb8 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -101,6 +101,7 @@ class HLoopOptimization : public HOptimization {
     kNoSAD           = 1 << 11,  // no sum of absolute differences (SAD)
     kNoWideSAD       = 1 << 12,  // no sum of absolute differences (SAD) with operand widening
     kNoDotProd       = 1 << 13,  // no dot product
+    kNoIfCond        = 1 << 14,  // no if condition conversion
   };
 
   /*
@@ -136,6 +137,95 @@ class HLoopOptimization : public HOptimization {
     bool is_string_char_at;  // compressed string read
   };
 
+  // This structure describes the control flow (CF) -> data flow (DF) conversion of the loop
+  // with control flow (see below) for the purpose of predicated autovectorization.
+  //
+  // Lets define "loops without control-flow" (or non-CF loops) as loops with two consecutive
+  // blocks and without the branching structure except for the loop exit. And
+  // "loop with control-flow" (or CF-loops) - all other loops.
+  //
+  // In the execution of the original CF-loop on each iteration some basic block Y will be
+  // either executed or not executed, depending on the control flow of the loop. More
+  // specifically, a block will be executed if all the conditional branches of the nodes in
+  // the control dependency graph for that block Y are taken according to the path from the loop
+  // header to that basic block.
+  //
+  // This is the key idea of CF->DF conversion: a boolean value
+  // 'ctrl_pred == cond1 && cond2 && ...' will determine whether the basic block Y will be
+  // executed, where cond_K is whether the branch of the node K in the control dependency
+  // graph upward traversal was taken in the 'right' direction.
+  //
+  // Def.: BB Y is control dependent on BB X iff
+  //   (1) there exists a directed path P from X to Y with any basic block Z in P (excluding X
+  //       and Y) post-dominated by Y and
+  //   (2) X is not post-dominated by Y.
+  //             ...
+  //              X
+  //     false /     \ true
+  //          /       \
+  //                  ...
+  //                   |
+  //                   Y
+  //                  ...
+  //
+  // When doing predicated autovectorization of a CF loop, we use the CF->DF conversion approach:
+  //  1) do the data analysis and vector operation creation as if it was a non-CF loop.
+  //  2) for each HIf block create two vector predicate setting instructions - for True and False
+  //     edges/paths.
+  //  3) assign a governing vector predicate (see comments near HVecPredSetOperation)
+  //     to each vector operation Alpha in the loop (including to those vector predicate setting
+  //     instructions created in #2); do this by:
+  //     - finding the immediate control dependent block of the instruction Alpha's block.
+  //     - choosing the True or False predicate setting instruction (created in #2) depending
+  //       on the path to the instruction.
+  //
+  // For more information check the papers:
+  //
+  //   - Allen, John R and Kennedy, Ken and Porterfield, Carrie and Warren, Joe,
+  //     “Conversion of Control Dependence to Data Dependence,” in Proceedings of the 10th ACM
+  //     SIGACT-SIGPLAN Symposium on Principles of Programming Languages, 1983, pp. 177–189.
+  //   - JEANNE FERRANTE, KARL J. OTTENSTEIN, JOE D. WARREN,
+  //     "The Program Dependence Graph and Its Use in Optimization"
+  //
+  class BlockPredicateInfo : public ArenaObject<kArenaAllocLoopOptimization> {
+   public:
+    BlockPredicateInfo() :
+        control_predicate_(nullptr),
+        true_predicate_(nullptr),
+        false_predicate_(nullptr) {}
+
+    void SetControlFlowInfo(HVecPredSetOperation* true_predicate,
+                            HVecPredSetOperation* false_predicate) {
+      DCHECK(!HasControlFlowOps());
+      true_predicate_ = true_predicate;
+      false_predicate_ = false_predicate;
+    }
+
+    bool HasControlFlowOps() const {
+      // Note: a block must have both T/F predicates set or none of them.
+      DCHECK_EQ(true_predicate_ == nullptr, false_predicate_ == nullptr);
+      return true_predicate_ != nullptr;
+    }
+
+    HVecPredSetOperation* GetControlPredicate() const { return control_predicate_; }
+    void SetControlPredicate(HVecPredSetOperation* control_predicate) {
+      control_predicate_ = control_predicate;
+    }
+
+    HVecPredSetOperation* GetTruePredicate() const { return true_predicate_; }
+    HVecPredSetOperation* GetFalsePredicate() const { return false_predicate_; }
+
+   private:
+    // Vector control predicate operation, associated with the block which will determine
+    // the active lanes for all vector operations, originated from this block.
+    HVecPredSetOperation* control_predicate_;
+
+    // Vector predicate instruction, associated with the true sucessor of the block.
+    HVecPredSetOperation* true_predicate_;
+    // Vector predicate instruction, associated with the false sucessor of the block.
+    HVecPredSetOperation* false_predicate_;
+  };
+
   //
   // Loop setup and traversal.
   //
@@ -203,15 +293,95 @@ class HLoopOptimization : public HOptimization {
   // Vectorization analysis and synthesis.
   //
 
-  bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
-  void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
-  void GenerateNewLoop(LoopNode* node,
-                       HBasicBlock* block,
-                       HBasicBlock* new_preheader,
-                       HInstruction* lo,
-                       HInstruction* hi,
-                       HInstruction* step,
-                       uint32_t unroll);
+  // Returns whether the data flow requirements are met for vectorization.
+  //
+  //   - checks whether instructions are vectorizable for the target.
+  //   - conducts data dependence analysis for array references.
+  //   - additionally, collects info on peeling and aligment strategy.
+  bool CanVectorizeDataFlow(LoopNode* node, HBasicBlock* header, bool collect_alignment_info);
+
+  // Does the checks (common for predicated and traditional mode) for the loop.
+  bool ShouldVectorizeCommon(LoopNode* node, HPhi* main_phi, int64_t trip_count);
+
+  // Try to vectorize the loop, returns whether it was successful.
+  //
+  // There are two versions/algorithms:
+  //  - Predicated: all the vector operations have governing predicates which control
+  //    which individual vector lanes will be active (see HVecPredSetOperation for more details).
+  //    Example: vectorization using AArch64 SVE.
+  //  - Traditional: a regular mode in which all vector operations lanes are unconditionally
+  //    active.
+  //    Example: vectoriation using AArch64 NEON.
+  bool TryVectorizePredicated(LoopNode* node,
+                              HBasicBlock* body,
+                              HBasicBlock* exit,
+                              HPhi* main_phi,
+                              int64_t trip_count);
+
+  bool TryVectorizedTraditional(LoopNode* node,
+                                HBasicBlock* body,
+                                HBasicBlock* exit,
+                                HPhi* main_phi,
+                                int64_t trip_count);
+
+  // Vectorizes the loop for which all checks have been already done.
+  void VectorizePredicated(LoopNode* node,
+                           HBasicBlock* block,
+                           HBasicBlock* exit);
+  void VectorizeTraditional(LoopNode* node,
+                            HBasicBlock* block,
+                            HBasicBlock* exit,
+                            int64_t trip_count);
+
+  // Performs final steps for whole vectorization process: links reduction, removes the original
+  // scalar loop, updates loop info.
+  void FinalizeVectorization(LoopNode* node);
+
+  // Helpers that do the vector instruction synthesis for the previously created loop; create
+  // and fill the loop body with instructions.
+  //
+  // A version to generate a vector loop in predicated mode.
+  void GenerateNewLoopPredicated(LoopNode* node,
+                                 HBasicBlock* new_preheader,
+                                 HInstruction* lo,
+                                 HInstruction* hi,
+                                 HInstruction* step);
+
+  // A version to generate a vector loop in traditional mode or to generate
+  // a scalar loop for both modes.
+  void GenerateNewLoopScalarOrTraditional(LoopNode* node,
+                                          HBasicBlock* new_preheader,
+                                          HInstruction* lo,
+                                          HInstruction* hi,
+                                          HInstruction* step,
+                                          uint32_t unroll);
+
+  //
+  // Helpers for GenerateNewLoop*.
+  //
+
+  // Updates vectorization bookkeeping date for the new loop, creates and returns
+  // its main induction Phi.
+  HPhi* InitializeForNewLoop(HBasicBlock* new_preheader, HInstruction* lo);
+
+  // Finalizes reduction and induction phis' inputs for the newly created loop.
+  void FinalizePhisForNewLoop(HPhi* phi, HInstruction* lo);
+
+  // Creates empty predicate info object for each basic block and puts it into the map.
+  void PreparePredicateInfoMap(LoopNode* node);
+
+  // Set up block true/false predicates using info, collected through data flow and control
+  // dependency analysis.
+  void InitPredicateInfoMap(LoopNode* node, HVecPredSetOperation* loop_main_pred);
+
+  // Performs instruction synthesis for the loop body.
+  void GenerateNewLoopBodyOnce(LoopNode* node,
+                               DataType::Type induc_type,
+                               HInstruction* step);
+
+  // Returns whether the vector loop needs runtime disambiguation test for array refs.
+  bool NeedsArrayRefsDisambiguationTest() const { return vector_runtime_test_a_ != nullptr; }
+
   bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
   bool VectorizeUse(LoopNode* node,
                     HInstruction* instruction,
@@ -239,10 +409,10 @@ class HLoopOptimization : public HOptimization {
   void GenerateVecReductionPhi(HPhi* phi);
   void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
   HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
-  void GenerateVecOp(HInstruction* org,
-                     HInstruction* opa,
-                     HInstruction* opb,
-                     DataType::Type type);
+  HInstruction* GenerateVecOp(HInstruction* org,
+                              HInstruction* opa,
+                              HInstruction* opb,
+                              DataType::Type type);
 
   // Vectorization idioms.
   bool VectorizeSaturationIdiom(LoopNode* node,
@@ -265,6 +435,10 @@ class HLoopOptimization : public HOptimization {
                              bool generate_code,
                              DataType::Type type,
                              uint64_t restrictions);
+  bool VectorizeIfCondition(LoopNode* node,
+                            HInstruction* instruction,
+                            bool generate_code,
+                            uint64_t restrictions);
 
   // Vectorization heuristics.
   Alignment ComputeAlignment(HInstruction* offset,
@@ -369,6 +543,16 @@ class HLoopOptimization : public HOptimization {
   // Contents reside in phase-local heap memory.
   ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
 
+  // Tracks vector operations that are inserted outside of the loop (preheader, exit)
+  // as part of vectorization (e.g. replicate scalar for loop invariants and reduce ops
+  // for loop reductions).
+  ScopedArenaSet<HInstruction*>* vector_external_set_;
+
+  // A mapping between a basic block of the original loop and its associated PredicateInfo.
+  //
+  // Only used in predicated loop vectorization mode.
+  ScopedArenaSafeMap<HBasicBlock*, BlockPredicateInfo*>* predicate_info_map_;
+
   // Temporary vectorization bookkeeping.
   VectorMode vector_mode_;  // synthesis mode
   HBasicBlock* vector_preheader_;  // preheader of the new loop
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 7f694fb655..49e3c0418f 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -30,6 +30,7 @@ namespace art HIDDEN {
 class LoopOptimizationTest : public OptimizingUnitTest {
  protected:
   void SetUp() override {
+    TEST_SETUP_DISABLED_FOR_RISCV64();
     OptimizingUnitTest::SetUp();
 
     graph_ = CreateGraph();
@@ -44,6 +45,7 @@ class LoopOptimizationTest : public OptimizingUnitTest {
   }
 
   void TearDown() override {
+    TEST_TEARDOWN_DISABLED_FOR_RISCV64();
     codegen_.reset();
     compiler_options_.reset();
     graph_ = nullptr;
@@ -134,17 +136,20 @@ class LoopOptimizationTest : public OptimizingUnitTest {
 //
 
 TEST_F(LoopOptimizationTest, NoLoops) {
+  TEST_DISABLED_FOR_RISCV64();
   PerformAnalysis();
   EXPECT_EQ("", LoopStructure());
 }
 
 TEST_F(LoopOptimizationTest, SingleLoop) {
+  TEST_DISABLED_FOR_RISCV64();
   AddLoop(entry_block_, return_block_);
   PerformAnalysis();
   EXPECT_EQ("[]", LoopStructure());
 }
 
 TEST_F(LoopOptimizationTest, LoopNest10) {
+  TEST_DISABLED_FOR_RISCV64();
   HBasicBlock* b = entry_block_;
   HBasicBlock* s = return_block_;
   for (int i = 0; i < 10; i++) {
@@ -156,6 +161,7 @@ TEST_F(LoopOptimizationTest, LoopNest10) {
 }
 
 TEST_F(LoopOptimizationTest, LoopSequence10) {
+  TEST_DISABLED_FOR_RISCV64();
   HBasicBlock* b = entry_block_;
   HBasicBlock* s = return_block_;
   for (int i = 0; i < 10; i++) {
@@ -167,6 +173,7 @@ TEST_F(LoopOptimizationTest, LoopSequence10) {
 }
 
 TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
+  TEST_DISABLED_FOR_RISCV64();
   HBasicBlock* b = entry_block_;
   HBasicBlock* s = return_block_;
   for (int i = 0; i < 10; i++) {
@@ -194,6 +201,7 @@ TEST_F(LoopOptimizationTest, LoopSequenceOfNests) {
 }
 
 TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
+  TEST_DISABLED_FOR_RISCV64();
   HBasicBlock* b = entry_block_;
   HBasicBlock* s = return_block_;
   for (int i = 0; i < 10; i++) {
@@ -215,6 +223,7 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) {
 //
 // This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
 TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
+  TEST_DISABLED_FOR_RISCV64();
   // Can't use AddLoop as we want special order for blocks predecessors.
   HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_);
   HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_);
@@ -260,6 +269,7 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
 //
 // This is a test for nodes.cc functionality - HGraph::SimplifyLoop.
 TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) {
+  TEST_DISABLED_FOR_RISCV64();
   HBasicBlock* header = AddLoop(entry_block_, return_block_);
 
   header->InsertInstructionBefore(
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3790058879..5795ea7ca9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -36,6 +36,7 @@
 #include "code_generator.h"
 #include "common_dominator.h"
 #include "intrinsics.h"
+#include "intrinsics_list.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ssa_builder.h"
@@ -1488,12 +1489,12 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1,
                                    const HInstruction* instruction2) const {
   DCHECK_EQ(instruction1->GetBlock(), instruction2->GetBlock());
   for (HInstructionIterator it(*this); !it.Done(); it.Advance()) {
-    if (it.Current() == instruction1) {
-      return true;
-    }
     if (it.Current() == instruction2) {
       return false;
     }
+    if (it.Current() == instruction1) {
+      return true;
+    }
   }
   LOG(FATAL) << "Did not find an order between two instructions of the same block.";
   UNREACHABLE();
@@ -1815,10 +1816,12 @@ void HGraphVisitor::VisitBasicBlock(HBasicBlock* block) {
   }
 }
 
-HConstant* HTypeConversion::TryStaticEvaluation() const {
-  HGraph* graph = GetBlock()->GetGraph();
-  if (GetInput()->IsIntConstant()) {
-    int32_t value = GetInput()->AsIntConstant()->GetValue();
+HConstant* HTypeConversion::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); }
+
+HConstant* HTypeConversion::TryStaticEvaluation(HInstruction* input) const {
+  HGraph* graph = input->GetBlock()->GetGraph();
+  if (input->IsIntConstant()) {
+    int32_t value = input->AsIntConstant()->GetValue();
     switch (GetResultType()) {
       case DataType::Type::kInt8:
         return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
@@ -1837,8 +1840,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
       default:
         return nullptr;
     }
-  } else if (GetInput()->IsLongConstant()) {
-    int64_t value = GetInput()->AsLongConstant()->GetValue();
+  } else if (input->IsLongConstant()) {
+    int64_t value = input->AsLongConstant()->GetValue();
     switch (GetResultType()) {
       case DataType::Type::kInt8:
         return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc());
@@ -1857,8 +1860,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
       default:
         return nullptr;
     }
-  } else if (GetInput()->IsFloatConstant()) {
-    float value = GetInput()->AsFloatConstant()->GetValue();
+  } else if (input->IsFloatConstant()) {
+    float value = input->AsFloatConstant()->GetValue();
     switch (GetResultType()) {
       case DataType::Type::kInt32:
         if (std::isnan(value))
@@ -1881,8 +1884,8 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
       default:
         return nullptr;
     }
-  } else if (GetInput()->IsDoubleConstant()) {
-    double value = GetInput()->AsDoubleConstant()->GetValue();
+  } else if (input->IsDoubleConstant()) {
+    double value = input->AsDoubleConstant()->GetValue();
     switch (GetResultType()) {
       case DataType::Type::kInt32:
         if (std::isnan(value))
@@ -1909,41 +1912,47 @@ HConstant* HTypeConversion::TryStaticEvaluation() const {
   return nullptr;
 }
 
-HConstant* HUnaryOperation::TryStaticEvaluation() const {
-  if (GetInput()->IsIntConstant()) {
-    return Evaluate(GetInput()->AsIntConstant());
-  } else if (GetInput()->IsLongConstant()) {
-    return Evaluate(GetInput()->AsLongConstant());
+HConstant* HUnaryOperation::TryStaticEvaluation() const { return TryStaticEvaluation(GetInput()); }
+
+HConstant* HUnaryOperation::TryStaticEvaluation(HInstruction* input) const {
+  if (input->IsIntConstant()) {
+    return Evaluate(input->AsIntConstant());
+  } else if (input->IsLongConstant()) {
+    return Evaluate(input->AsLongConstant());
   } else if (kEnableFloatingPointStaticEvaluation) {
-    if (GetInput()->IsFloatConstant()) {
-      return Evaluate(GetInput()->AsFloatConstant());
-    } else if (GetInput()->IsDoubleConstant()) {
-      return Evaluate(GetInput()->AsDoubleConstant());
+    if (input->IsFloatConstant()) {
+      return Evaluate(input->AsFloatConstant());
+    } else if (input->IsDoubleConstant()) {
+      return Evaluate(input->AsDoubleConstant());
     }
   }
   return nullptr;
 }
 
 HConstant* HBinaryOperation::TryStaticEvaluation() const {
-  if (GetLeft()->IsIntConstant() && GetRight()->IsIntConstant()) {
-    return Evaluate(GetLeft()->AsIntConstant(), GetRight()->AsIntConstant());
-  } else if (GetLeft()->IsLongConstant()) {
-    if (GetRight()->IsIntConstant()) {
+  return TryStaticEvaluation(GetLeft(), GetRight());
+}
+
+HConstant* HBinaryOperation::TryStaticEvaluation(HInstruction* left, HInstruction* right) const {
+  if (left->IsIntConstant() && right->IsIntConstant()) {
+    return Evaluate(left->AsIntConstant(), right->AsIntConstant());
+  } else if (left->IsLongConstant()) {
+    if (right->IsIntConstant()) {
       // The binop(long, int) case is only valid for shifts and rotations.
       DCHECK(IsShl() || IsShr() || IsUShr() || IsRor()) << DebugName();
-      return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsIntConstant());
-    } else if (GetRight()->IsLongConstant()) {
-      return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant());
+      return Evaluate(left->AsLongConstant(), right->AsIntConstant());
+    } else if (right->IsLongConstant()) {
+      return Evaluate(left->AsLongConstant(), right->AsLongConstant());
     }
-  } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
+  } else if (left->IsNullConstant() && right->IsNullConstant()) {
     // The binop(null, null) case is only valid for equal and not-equal conditions.
     DCHECK(IsEqual() || IsNotEqual()) << DebugName();
-    return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
+    return Evaluate(left->AsNullConstant(), right->AsNullConstant());
   } else if (kEnableFloatingPointStaticEvaluation) {
-    if (GetLeft()->IsFloatConstant() && GetRight()->IsFloatConstant()) {
-      return Evaluate(GetLeft()->AsFloatConstant(), GetRight()->AsFloatConstant());
-    } else if (GetLeft()->IsDoubleConstant() && GetRight()->IsDoubleConstant()) {
-      return Evaluate(GetLeft()->AsDoubleConstant(), GetRight()->AsDoubleConstant());
+    if (left->IsFloatConstant() && right->IsFloatConstant()) {
+      return Evaluate(left->AsFloatConstant(), right->AsFloatConstant());
+    } else if (left->IsDoubleConstant() && right->IsDoubleConstant()) {
+      return Evaluate(left->AsDoubleConstant(), right->AsDoubleConstant());
     }
   }
   return nullptr;
@@ -2797,8 +2806,11 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
   if (HasMonitorOperations()) {
     outer_graph->SetHasMonitorOperations(true);
   }
-  if (HasSIMD()) {
-    outer_graph->SetHasSIMD(true);
+  if (HasTraditionalSIMD()) {
+    outer_graph->SetHasTraditionalSIMD(true);
+  }
+  if (HasPredicatedSIMD()) {
+    outer_graph->SetHasPredicatedSIMD(true);
   }
   if (HasAlwaysThrowingInvokes()) {
     outer_graph->SetHasAlwaysThrowingInvokes(true);
@@ -3026,9 +3038,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
       replacement = outer_graph->GetDoubleConstant(
           current->AsDoubleConstant()->GetValue(), current->GetDexPc());
     } else if (current->IsParameterValue()) {
-      if (kIsDebugBuild
-          && invoke->IsInvokeStaticOrDirect()
-          && invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
+      if (kIsDebugBuild &&
+          invoke->IsInvokeStaticOrDirect() &&
+          invoke->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck()) {
         // Ensure we do not use the last input of `invoke`, as it
         // contains a clinit check which is not an actual argument.
         size_t last_input_index = invoke->InputCount() - 1;
@@ -3125,6 +3137,8 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
       new_pre_header, old_pre_header, /* replace_if_back_edge= */ false);
 }
 
+// Creates a new two-basic-block loop and inserts it between original loop header and
+// original loop exit; also adjusts dominators, post order and new LoopInformation.
 HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
                                                    HBasicBlock* body,
                                                    HBasicBlock* exit) {
@@ -3518,9 +3532,7 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
   static_assert( \
     static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \
     "Instrinsics enumeration space overflow.");
-#include "intrinsics_list.h"
-  INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
-#undef INTRINSICS_LIST
+  ART_INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
 #undef CHECK_INTRINSICS_ENUM_VALUES
 
 // Function that returns whether an intrinsic needs an environment or not.
@@ -3531,9 +3543,7 @@ static inline IntrinsicNeedsEnvironment NeedsEnvironmentIntrinsic(Intrinsics i)
 #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
     case Intrinsics::k ## Name: \
       return NeedsEnv;
-#include "intrinsics_list.h"
-      INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+      ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
   }
   return kNeedsEnvironment;
@@ -3547,9 +3557,7 @@ static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) {
 #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
     case Intrinsics::k ## Name: \
       return SideEffects;
-#include "intrinsics_list.h"
-      INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+      ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
   }
   return kAllSideEffects;
@@ -3563,9 +3571,7 @@ static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) {
 #define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnv, SideEffects, Exceptions, ...) \
     case Intrinsics::k ## Name: \
       return Exceptions;
-#include "intrinsics_list.h"
-      INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+      ART_INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
 #undef OPTIMIZING_INTRINSICS
   }
   return kCanThrow;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 28112d176a..9cf52183b8 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -403,7 +403,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
         has_bounds_checks_(false),
         has_try_catch_(false),
         has_monitor_operations_(false),
-        has_simd_(false),
+        has_traditional_simd_(false),
+        has_predicated_simd_(false),
         has_loops_(false),
         has_irreducible_loops_(false),
         has_direct_critical_native_call_(false),
@@ -708,8 +709,13 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
   bool HasMonitorOperations() const { return has_monitor_operations_; }
   void SetHasMonitorOperations(bool value) { has_monitor_operations_ = value; }
 
-  bool HasSIMD() const { return has_simd_; }
-  void SetHasSIMD(bool value) { has_simd_ = value; }
+  bool HasTraditionalSIMD() { return has_traditional_simd_; }
+  void SetHasTraditionalSIMD(bool value) { has_traditional_simd_ = value; }
+
+  bool HasPredicatedSIMD() { return has_predicated_simd_; }
+  void SetHasPredicatedSIMD(bool value) { has_predicated_simd_ = value; }
+
+  bool HasSIMD() const { return has_traditional_simd_ || has_predicated_simd_; }
 
   bool HasLoops() const { return has_loops_; }
   void SetHasLoops(bool value) { has_loops_ = value; }
@@ -822,10 +828,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
   // DexRegisterMap to be present to allow deadlock analysis for non-debuggable code.
   bool has_monitor_operations_;
 
-  // Flag whether SIMD instructions appear in the graph. If true, the
-  // code generators may have to be more careful spilling the wider
+  // Flags whether SIMD (traditional or predicated) instructions appear in the graph.
+  // If either is true, the code generators may have to be more careful spilling the wider
   // contents of SIMD registers.
-  bool has_simd_;
+  bool has_traditional_simd_;
+  bool has_predicated_simd_;
 
   // Flag whether there are any loops in the graph. We can skip loop
   // optimization if it's false.
@@ -1636,7 +1643,9 @@ class HLoopInformationOutwardIterator : public ValueObject {
   M(VecStore, VecMemoryOperation)                                       \
   M(VecPredSetAll, VecPredSetOperation)                                 \
   M(VecPredWhile, VecPredSetOperation)                                  \
-  M(VecPredCondition, VecOperation)                                     \
+  M(VecPredToBoolean, VecOperation)                                     \
+  M(VecCondition, VecPredSetOperation)                                  \
+  M(VecPredNot, VecPredSetOperation)                                    \
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M)                         \
   FOR_EACH_CONCRETE_INSTRUCTION_SCALAR_COMMON(M)                        \
@@ -1659,6 +1668,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 
+#define FOR_EACH_CONCRETE_INSTRUCTION_RISCV64(M)
+
 #ifndef ART_ENABLE_CODEGEN_x86
 #define FOR_EACH_CONCRETE_INSTRUCTION_X86(M)
 #else
@@ -1715,7 +1726,7 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
   const char* DebugName() const override { return #type; }                \
   HInstruction* Clone(ArenaAllocator* arena) const override {             \
     DCHECK(IsClonable());                                                 \
-    return new (arena) H##type(*this->As##type());                        \
+    return new (arena) H##type(*this);                                    \
   }                                                                       \
   void Accept(HGraphVisitor* visitor) override
 
@@ -2062,12 +2073,12 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
                              ArtMethod* method,
                              uint32_t dex_pc,
                              HInstruction* holder)
-     : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)),
-       locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)),
-       parent_(nullptr),
-       method_(method),
-       dex_pc_(dex_pc),
-       holder_(holder) {
+      : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)),
+        locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)),
+        parent_(nullptr),
+        method_(method),
+        dex_pc_(dex_pc),
+        holder_(holder) {
   }
 
   ALWAYS_INLINE HEnvironment(ArenaAllocator* allocator,
@@ -2183,9 +2194,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
 std::ostream& operator<<(std::ostream& os, const HInstruction& rhs);
 
 // Iterates over the Environments
-class HEnvironmentIterator : public ValueObject,
-                             public std::iterator<std::forward_iterator_tag, HEnvironment*> {
+class HEnvironmentIterator : public ValueObject {
  public:
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = HEnvironment*;
+  using difference_type = ptrdiff_t;
+  using pointer = void;
+  using reference = void;
+
   explicit HEnvironmentIterator(HEnvironment* cur) : cur_(cur) {}
 
   HEnvironment* operator*() const {
@@ -2355,9 +2371,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
     return true;
   }
 
-  virtual bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const {
-    return false;
-  }
+  virtual bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const { return false; }
 
   // If this instruction will do an implicit null check, return the `HNullCheck` associated
   // with it. Otherwise return null.
@@ -2553,7 +2567,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
 
 #define INSTRUCTION_TYPE_CAST(type, super)                                     \
   const H##type* As##type() const;                                             \
-  H##type* As##type();
+  H##type* As##type();                                                         \
+  const H##type* As##type##OrNull() const;                                     \
+  H##type* As##type##OrNull();
 
   FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
 #undef INSTRUCTION_TYPE_CAST
@@ -2568,7 +2584,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
   //
   // Note: HEnvironment and some other fields are not copied and are set to default values, see
   // 'explicit HInstruction(const HInstruction& other)' for details.
-  virtual HInstruction* Clone(ArenaAllocator* arena ATTRIBUTE_UNUSED) const {
+  virtual HInstruction* Clone([[maybe_unused]] ArenaAllocator* arena) const {
     LOG(FATAL) << "Cloning is not implemented for the instruction " <<
                   DebugName() << " " << GetId();
     UNREACHABLE();
@@ -2596,7 +2612,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
   // Returns whether any data encoded in the two instructions is equal.
   // This method does not look at the inputs. Both instructions must be
   // of the same type, otherwise the method has undefined behavior.
-  virtual bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
+  virtual bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const {
     return false;
   }
 
@@ -2729,7 +2745,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
 
  private:
   using InstructionKindField =
-     BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>;
+      BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>;
 
   void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) {
     auto before_use_node = uses_.before_begin();
@@ -2904,9 +2920,14 @@ class HBackwardInstructionIterator : public ValueObject {
 };
 
 template <typename InnerIter>
-struct HSTLInstructionIterator : public ValueObject,
-                                 public std::iterator<std::forward_iterator_tag, HInstruction*> {
+struct HSTLInstructionIterator : public ValueObject {
  public:
+  using iterator_category = std::forward_iterator_tag;
+  using value_type = HInstruction*;
+  using difference_type = ptrdiff_t;
+  using pointer = void;
+  using reference = void;
+
   static_assert(std::is_same_v<InnerIter, HBackwardInstructionIterator> ||
                     std::is_same_v<InnerIter, HInstructionIterator> ||
                     std::is_same_v<InnerIter, HInstructionIteratorHandleChanges>,
@@ -3164,7 +3185,7 @@ class HPhi final : public HVariableInputSizeInstruction {
   bool IsVRegEquivalentOf(const HInstruction* other) const {
     return other != nullptr
         && other->IsPhi()
-        && other->AsPhi()->GetBlock() == GetBlock()
+        && other->GetBlock() == GetBlock()
         && other->AsPhi()->GetRegNumber() == GetRegNumber();
   }
 
@@ -3270,7 +3291,7 @@ class HConstant : public HExpression<0> {
 
 class HNullConstant final : public HConstant {
  public:
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -3639,7 +3660,8 @@ class HDeoptimize final : public HVariableInputSizeInstruction {
   bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); }
 
   bool InstructionDataEquals(const HInstruction* other) const override {
-    return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
+    return (other->CanBeMoved() == CanBeMoved()) &&
+           (other->AsDeoptimize()->GetDeoptimizationKind() == GetDeoptimizationKind());
   }
 
   bool NeedsEnvironment() const override { return true; }
@@ -3827,7 +3849,7 @@ class HUnaryOperation : public HExpression<1> {
   DataType::Type GetResultType() const { return GetType(); }
 
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -3836,6 +3858,9 @@ class HUnaryOperation : public HExpression<1> {
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
+  // Same but for `input` instead of GetInput().
+  HConstant* TryStaticEvaluation(HInstruction* input) const;
+
   // Apply this operation to `x`.
   virtual HConstant* Evaluate(HIntConstant* x) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x) const = 0;
@@ -3903,7 +3928,7 @@ class HBinaryOperation : public HExpression<2> {
   }
 
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -3912,16 +3937,19 @@ class HBinaryOperation : public HExpression<2> {
   // be evaluated as a constant, return null.
   HConstant* TryStaticEvaluation() const;
 
+  // Same but for `left` and `right` instead of GetLeft() and GetRight().
+  HConstant* TryStaticEvaluation(HInstruction* left, HInstruction* right) const;
+
   // Apply this operation to `x` and `y`.
-  virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
-                              HNullConstant* y ATTRIBUTE_UNUSED) const {
+  virtual HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+                              [[maybe_unused]] HNullConstant* y) const {
     LOG(FATAL) << DebugName() << " is not defined for the (null, null) case.";
     UNREACHABLE();
   }
   virtual HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const = 0;
   virtual HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const = 0;
-  virtual HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED,
-                              HIntConstant* y ATTRIBUTE_UNUSED) const {
+  virtual HConstant* Evaluate([[maybe_unused]] HLongConstant* x,
+                              [[maybe_unused]] HIntConstant* y) const {
     LOG(FATAL) << DebugName() << " is not defined for the (long, int) case.";
     UNREACHABLE();
   }
@@ -4049,8 +4077,8 @@ class HEqual final : public HCondition {
 
   bool IsCommutative() const override { return true; }
 
-  HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
-                      HNullConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+                      [[maybe_unused]] HNullConstant* y) const override {
     return MakeConstantCondition(true, GetDexPc());
   }
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
@@ -4096,8 +4124,8 @@ class HNotEqual final : public HCondition {
 
   bool IsCommutative() const override { return true; }
 
-  HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
-                      HNullConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HNullConstant* x,
+                      [[maybe_unused]] HNullConstant* y) const override {
     return MakeConstantCondition(false, GetDexPc());
   }
   HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
@@ -4303,13 +4331,13 @@ class HBelow final : public HCondition {
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
     return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -4345,13 +4373,13 @@ class HBelowOrEqual final : public HCondition {
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
     return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -4387,13 +4415,13 @@ class HAbove final : public HCondition {
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
     return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -4429,13 +4457,13 @@ class HAboveOrEqual final : public HCondition {
   HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
     return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -4522,7 +4550,7 @@ class HCompare final : public HBinaryOperation {
     return GetBias() == ComparisonBias::kGtBias;
   }
 
-  static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type type ATTRIBUTE_UNUSED) {
+  static SideEffects SideEffectsForArchRuntimeCalls([[maybe_unused]] DataType::Type type) {
     // Comparisons do not require a runtime call in any back end.
     return SideEffects::None();
   }
@@ -4859,8 +4887,7 @@ class HInvokePolymorphic final : public HInvoke {
                      // to pass intrinsic information to the HInvokePolymorphic node.
                      ArtMethod* resolved_method,
                      MethodReference resolved_method_reference,
-                     dex::ProtoIndex proto_idx,
-                     bool enable_intrinsic_opt)
+                     dex::ProtoIndex proto_idx)
       : HInvoke(kInvokePolymorphic,
                 allocator,
                 number_of_arguments,
@@ -4871,9 +4898,8 @@ class HInvokePolymorphic final : public HInvoke {
                 resolved_method,
                 resolved_method_reference,
                 kPolymorphic,
-                enable_intrinsic_opt),
-        proto_idx_(proto_idx) {
-  }
+                /* enable_intrinsic_opt= */ true),
+        proto_idx_(proto_idx) {}
 
   bool IsClonable() const override { return true; }
 
@@ -5015,7 +5041,7 @@ class HInvokeStaticOrDirect final : public HInvoke {
     return input_records;
   }
 
-  bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+  bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
     // We do not access the method via object reference, so we cannot do an implicit null check.
     // TODO: for intrinsics we can generate implicit null checks.
     return false;
@@ -5599,10 +5625,14 @@ class HMin final : public HBinaryOperation {
         ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   // TODO: Evaluation for floating-point values.
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
+    return nullptr;
+  }
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
+    return nullptr;
+  }
 
   DECLARE_INSTRUCTION(Min);
 
@@ -5634,10 +5664,14 @@ class HMax final : public HBinaryOperation {
         ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
   }
   // TODO: Evaluation for floating-point values.
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
+    return nullptr;
+  }
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
+    return nullptr;
+  }
 
   DECLARE_INSTRUCTION(Max);
 
@@ -5699,7 +5733,7 @@ class HDivZeroCheck final : public HExpression<1> {
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
 
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -5736,18 +5770,18 @@ class HShl final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
-                      HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+                      [[maybe_unused]] HLongConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
-                      HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+                      [[maybe_unused]] HFloatConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
-                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+                      [[maybe_unused]] HDoubleConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -5782,18 +5816,18 @@ class HShr final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
-                      HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+                      [[maybe_unused]] HLongConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
-                      HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+                      [[maybe_unused]] HFloatConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
-                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+                      [[maybe_unused]] HDoubleConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -5830,18 +5864,18 @@ class HUShr final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
-                      HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+                      [[maybe_unused]] HLongConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
-                      HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+                      [[maybe_unused]] HFloatConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
-                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+                      [[maybe_unused]] HDoubleConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -5873,13 +5907,13 @@ class HAnd final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -5911,13 +5945,13 @@ class HOr final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -5949,13 +5983,13 @@ class HXor final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -5993,18 +6027,18 @@ class HRor final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
-                      HLongConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HLongConstant* value,
+                      [[maybe_unused]] HLongConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
-                      HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* value,
+                      [[maybe_unused]] HFloatConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
-                      HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* value,
+                      [[maybe_unused]] HDoubleConstant* distance) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -6067,7 +6101,7 @@ class HNot final : public HUnaryOperation {
   }
 
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -6079,11 +6113,11 @@ class HNot final : public HUnaryOperation {
   HConstant* Evaluate(HLongConstant* x) const override {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -6101,7 +6135,7 @@ class HBooleanNot final : public HUnaryOperation {
   }
 
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -6113,15 +6147,15 @@ class HBooleanNot final : public HUnaryOperation {
   HConstant* Evaluate(HIntConstant* x) const override {
     return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HLongConstant* x) const override {
     LOG(FATAL) << DebugName() << " is not defined for long values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -6148,7 +6182,7 @@ class HTypeConversion final : public HExpression<1> {
 
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
   // Return whether the conversion is implicit. This includes conversion to the same type.
@@ -6160,6 +6194,9 @@ class HTypeConversion final : public HExpression<1> {
   // containing the result.  If the input cannot be converted, return nullptr.
   HConstant* TryStaticEvaluation() const;
 
+  // Same but for `input` instead of GetInput().
+  HConstant* TryStaticEvaluation(HInstruction* input) const;
+
   DECLARE_INSTRUCTION(TypeConversion);
 
  protected:
@@ -6180,7 +6217,7 @@ class HNullCheck final : public HExpression<1> {
 
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -6511,12 +6548,12 @@ class HArrayGet final : public HExpression<2> {
             HInstruction* index,
             DataType::Type type,
             uint32_t dex_pc)
-     : HArrayGet(array,
-                 index,
-                 type,
-                 SideEffects::ArrayReadOfType(type),
-                 dex_pc,
-                 /* is_string_char_at= */ false) {
+      : HArrayGet(array,
+                  index,
+                  type,
+                  SideEffects::ArrayReadOfType(type),
+                  dex_pc,
+                  /* is_string_char_at= */ false) {
   }
 
   HArrayGet(HInstruction* array,
@@ -6533,10 +6570,10 @@ class HArrayGet final : public HExpression<2> {
 
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
-  bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+  bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
     // TODO: We can be smarter here.
     // Currently, unless the array is the result of NewArray, the array access is always
     // preceded by some form of null NullCheck necessary for the bounds check, usually
@@ -6640,7 +6677,7 @@ class HArraySet final : public HExpression<3> {
   // Can throw ArrayStoreException.
   bool CanThrow() const override { return NeedsTypeCheck(); }
 
-  bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
+  bool CanDoImplicitNullCheckOn([[maybe_unused]] HInstruction* obj) const override {
     // TODO: Same as for ArrayGet.
     return false;
   }
@@ -6746,7 +6783,7 @@ class HArrayLength final : public HExpression<1> {
 
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
   bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
@@ -6790,7 +6827,7 @@ class HBoundsCheck final : public HExpression<2> {
 
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -7000,17 +7037,15 @@ class HLoadClass final : public HInstruction {
   bool CanCallRuntime() const {
     return NeedsAccessCheck() ||
            MustGenerateClinitCheck() ||
-           GetLoadKind() == LoadKind::kRuntimeCall ||
-           GetLoadKind() == LoadKind::kBssEntry;
+           NeedsBss() ||
+           GetLoadKind() == LoadKind::kRuntimeCall;
   }
 
   bool CanThrow() const override {
     return NeedsAccessCheck() ||
            MustGenerateClinitCheck() ||
            // If the class is in the boot image, the lookup in the runtime call cannot throw.
-           ((GetLoadKind() == LoadKind::kRuntimeCall ||
-             GetLoadKind() == LoadKind::kBssEntry) &&
-            !IsInBootImage());
+           ((GetLoadKind() == LoadKind::kRuntimeCall || NeedsBss()) && !IsInBootImage());
   }
 
   ReferenceTypeInfo GetLoadedClassRTI() {
@@ -7423,7 +7458,7 @@ class HClinitCheck final : public HExpression<1> {
   }
   // TODO: Make ClinitCheck clonable.
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -8343,7 +8378,7 @@ class HSelect final : public HExpression<3> {
   HInstruction* GetCondition() const { return InputAt(2); }
 
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
 
@@ -8351,6 +8386,12 @@ class HSelect final : public HExpression<3> {
     return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
   }
 
+  void UpdateType() {
+    DCHECK_EQ(HPhi::ToPhiType(GetTrueValue()->GetType()),
+              HPhi::ToPhiType(GetFalseValue()->GetType()));
+    SetPackedField<TypeField>(HPhi::ToPhiType(GetTrueValue()->GetType()));
+  }
+
   DECLARE_INSTRUCTION(Select);
 
  protected:
@@ -8513,7 +8554,7 @@ class HIntermediateAddress final : public HExpression<2> {
 
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
   bool IsActualObject() const override { return false; }
@@ -8550,7 +8591,7 @@ class HGraphVisitor : public ValueObject {
         graph_(graph) {}
   virtual ~HGraphVisitor() {}
 
-  virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {}
+  virtual void VisitInstruction([[maybe_unused]] HInstruction* instruction) {}
   virtual void VisitBasicBlock(HBasicBlock* block);
 
   // Visit the graph following basic block insertion order.
@@ -8623,7 +8664,7 @@ class CloneAndReplaceInstructionVisitor final : public HGraphDelegateVisitor {
   DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor);
 };
 
-// Iterator over the blocks that art part of the loop. Includes blocks part
+// Iterator over the blocks that are part of the loop; includes blocks which are part
 // of an inner loop. The order in which the blocks are iterated is on their
 // block id.
 class HBlocksInLoopIterator : public ValueObject {
@@ -8656,7 +8697,7 @@ class HBlocksInLoopIterator : public ValueObject {
   DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopIterator);
 };
 
-// Iterator over the blocks that art part of the loop. Includes blocks part
+// Iterator over the blocks that are part of the loop; includes blocks which are part
 // of an inner loop. The order in which the blocks are iterated is reverse
 // post order.
 class HBlocksInLoopReversePostOrderIterator : public ValueObject {
@@ -8689,6 +8730,39 @@ class HBlocksInLoopReversePostOrderIterator : public ValueObject {
   DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopReversePostOrderIterator);
 };
 
+// Iterator over the blocks that are part of the loop; includes blocks which are part
+// of an inner loop. The order in which the blocks are iterated is post order.
+class HBlocksInLoopPostOrderIterator : public ValueObject {
+ public:
+  explicit HBlocksInLoopPostOrderIterator(const HLoopInformation& info)
+      : blocks_in_loop_(info.GetBlocks()),
+        blocks_(info.GetHeader()->GetGraph()->GetReversePostOrder()),
+        index_(blocks_.size() - 1) {
+    if (!blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
+      Advance();
+    }
+  }
+
+  bool Done() const { return index_ < 0; }
+  HBasicBlock* Current() const { return blocks_[index_]; }
+  void Advance() {
+    --index_;
+    for (; index_ >= 0; --index_) {
+      if (blocks_in_loop_.IsBitSet(blocks_[index_]->GetBlockId())) {
+        break;
+      }
+    }
+  }
+
+ private:
+  const BitVector& blocks_in_loop_;
+  const ArenaVector<HBasicBlock*>& blocks_;
+
+  int32_t index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HBlocksInLoopPostOrderIterator);
+};
+
 // Returns int64_t value of a properly typed constant.
 inline int64_t Int64FromConstant(HConstant* constant) {
   if (constant->IsIntConstant()) {
@@ -8752,10 +8826,18 @@ inline bool IsZeroBitPattern(HInstruction* instruction) {
 
 #define INSTRUCTION_TYPE_CAST(type, super)                                     \
   inline const H##type* HInstruction::As##type() const {                       \
-    return Is##type() ? down_cast<const H##type*>(this) : nullptr;             \
+    DCHECK(Is##type());                                                        \
+    return down_cast<const H##type*>(this);                                    \
   }                                                                            \
   inline H##type* HInstruction::As##type() {                                   \
-    return Is##type() ? static_cast<H##type*>(this) : nullptr;                 \
+    DCHECK(Is##type());                                                        \
+    return down_cast<H##type*>(this);                                          \
+  }                                                                            \
+  inline const H##type* HInstruction::As##type##OrNull() const {               \
+    return Is##type() ? down_cast<const H##type*>(this) : nullptr;             \
+  }                                                                            \
+  inline H##type* HInstruction::As##type##OrNull() {                           \
+    return Is##type() ? down_cast<H##type*>(this) : nullptr;                   \
   }
 
   FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 27e610328f..4b0187d536 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -105,13 +105,13 @@ class HBitwiseNegatedRight final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -160,7 +160,7 @@ class HIntermediateAddressIndex final : public HExpression<3> {
 
   bool IsClonable() const override { return true; }
   bool CanBeMoved() const override { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
+  bool InstructionDataEquals([[maybe_unused]] const HInstruction* other) const override {
     return true;
   }
   bool IsActualObject() const override { return false; }
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 73f6c40a0d..6a60d6be01 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -1384,8 +1384,8 @@ class HVecPredWhile final : public HVecPredSetOperation {
   static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
   static constexpr size_t kCondKindSize =
       MinimumBitsToStore(static_cast<size_t>(CondKind::kLast));
-  static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
-  static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+  static constexpr size_t kNumberOfVecPredWhilePackedBits = kCondKind + kCondKindSize;
+  static_assert(kNumberOfVecPredWhilePackedBits <= kMaxNumberOfPackedBits,
                 "Too many packed fields.");
   using CondKindField = BitField<CondKind, kCondKind, kCondKindSize>;
 
@@ -1395,13 +1395,13 @@ class HVecPredWhile final : public HVecPredSetOperation {
 // Evaluates the predicate condition (PCondKind) for a vector predicate; outputs
 // a scalar boolean value result.
 //
-// Note: as VecPredCondition can be also predicated, only active elements (determined by the
+// Note: as VecPredToBoolean can be also predicated, only active elements (determined by the
 // instruction's governing predicate) of the input vector predicate are used for condition
 // evaluation.
 //
 // Note: this instruction is currently used as a workaround for the fact that IR instructions
 // can't have more than one output.
-class HVecPredCondition final : public HVecOperation {
+class HVecPredToBoolean final : public HVecOperation {
  public:
   // To get more info on the condition kinds please see "2.2 Process state, PSTATE" section of
   // "ARM Architecture Reference Manual Supplement. The Scalable Vector Extension (SVE),
@@ -1418,13 +1418,13 @@ class HVecPredCondition final : public HVecOperation {
     kEnumLast = kPLast
   };
 
-  HVecPredCondition(ArenaAllocator* allocator,
+  HVecPredToBoolean(ArenaAllocator* allocator,
                     HInstruction* input,
                     PCondKind pred_cond,
                     DataType::Type packed_type,
                     size_t vector_length,
                     uint32_t dex_pc)
-      : HVecOperation(kVecPredCondition,
+      : HVecOperation(kVecPredToBoolean,
                       allocator,
                       packed_type,
                       SideEffects::None(),
@@ -1447,19 +1447,86 @@ class HVecPredCondition final : public HVecOperation {
     return GetPackedField<CondKindField>();
   }
 
-  DECLARE_INSTRUCTION(VecPredCondition);
+  DECLARE_INSTRUCTION(VecPredToBoolean);
 
  protected:
   // Additional packed bits.
   static constexpr size_t kCondKind = HVecOperation::kNumberOfVectorOpPackedBits;
   static constexpr size_t kCondKindSize =
       MinimumBitsToStore(static_cast<size_t>(PCondKind::kEnumLast));
-  static constexpr size_t kNumberOfVecPredConditionPackedBits = kCondKind + kCondKindSize;
-  static_assert(kNumberOfVecPredConditionPackedBits <= kMaxNumberOfPackedBits,
+  static constexpr size_t kNumberOfVecPredToBooleanPackedBits = kCondKind + kCondKindSize;
+  static_assert(kNumberOfVecPredToBooleanPackedBits <= kMaxNumberOfPackedBits,
                 "Too many packed fields.");
   using CondKindField = BitField<PCondKind, kCondKind, kCondKindSize>;
 
-  DEFAULT_COPY_CONSTRUCTOR(VecPredCondition);
+  DEFAULT_COPY_CONSTRUCTOR(VecPredToBoolean);
+};
+
+// Evaluates condition for pairwise elements in two input vectors and sets the result
+// as an output predicate vector.
+//
+// viz. [ p1, .. , pn ]  = [ x1 OP y1 , x2 OP y2, .. , xn OP yn] where OP is CondKind
+// condition.
+//
+// Currently only kEqual is supported by this vector instruction - we don't even define
+// the kCondType here.
+// TODO: support other condition ops.
+class HVecCondition final : public HVecPredSetOperation {
+ public:
+  HVecCondition(ArenaAllocator* allocator,
+                HInstruction* left,
+                HInstruction* right,
+                DataType::Type packed_type,
+                size_t vector_length,
+                uint32_t dex_pc) :
+      HVecPredSetOperation(kVecCondition,
+                           allocator,
+                           packed_type,
+                           SideEffects::None(),
+                           /* number_of_inputs= */ 2,
+                           vector_length,
+                           dex_pc) {
+    DCHECK(left->IsVecOperation());
+    DCHECK(!left->IsVecPredSetOperation());
+    DCHECK(right->IsVecOperation());
+    DCHECK(!right->IsVecPredSetOperation());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+
+  DECLARE_INSTRUCTION(VecCondition);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(VecCondition);
+};
+
+// Inverts every component in the predicate vector.
+//
+// viz. [ p1, .. , pn ]  = [ !px1 , !px2 , .. , !pxn ].
+class HVecPredNot final : public HVecPredSetOperation {
+ public:
+  HVecPredNot(ArenaAllocator* allocator,
+                HInstruction* input,
+                DataType::Type packed_type,
+                size_t vector_length,
+                uint32_t dex_pc) :
+      HVecPredSetOperation(kVecPredNot,
+                           allocator,
+                           packed_type,
+                           SideEffects::None(),
+                           /* number_of_inputs= */ 1,
+                           vector_length,
+                           dex_pc) {
+    DCHECK(input->IsVecOperation());
+    DCHECK(input->IsVecPredSetOperation());
+
+    SetRawInputAt(0, input);
+  }
+
+  DECLARE_INSTRUCTION(VecPredNot);
+
+ protected:
+  DEFAULT_COPY_CONSTRUCTOR(VecPredNot);
 };
 
 }  // namespace art
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index e246390aa5..14d9823355 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -149,13 +149,13 @@ class HX86AndNot final : public HBinaryOperation {
     return GetBlock()->GetGraph()->GetLongConstant(
         Compute(x->GetValue(), y->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
-                      HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x,
+                      [[maybe_unused]] HFloatConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
-                      HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x,
+                      [[maybe_unused]] HDoubleConstant* y) const override {
     LOG(FATAL) << DebugName() << " is not defined for double values";
     UNREACHABLE();
   }
@@ -196,11 +196,11 @@ class HX86MaskOrResetLeastSetBit final : public HUnaryOperation {
   HConstant* Evaluate(HLongConstant* x) const override {
     return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
   }
-  HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HFloatConstant* x) const override {
     LOG(FATAL) << DebugName() << "is not defined for float values";
     UNREACHABLE();
   }
-  HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+  HConstant* Evaluate([[maybe_unused]] HDoubleConstant* x) const override {
     LOG(FATAL) << DebugName() << "is not defined for double values";
     UNREACHABLE();
   }
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 12e9a1046d..4f20b55c7e 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -313,8 +313,8 @@ ArenaVector<HOptimization*> ConstructOptimizations(
         opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats);
         break;
       case OptimizationPass::kInstructionSimplifierX86:
-       opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
-       break;
+        opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
+        break;
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
       case OptimizationPass::kInstructionSimplifierX86_64:
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index f12e748941..632c32a70b 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -89,7 +89,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
 
   void Finish() {
     code_gen_->GenerateFrameExit();
-    code_gen_->Finalize(&code_allocator_);
+    code_gen_->Finalize();
   }
 
   void Check(InstructionSet isa,
@@ -97,7 +97,7 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
              const std::vector<uint8_t>& expected_asm,
              const std::vector<uint8_t>& expected_cfi) {
     // Get the outputs.
-    ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory();
+    ArrayRef<const uint8_t> actual_asm = code_gen_->GetCode();
     Assembler* opt_asm = code_gen_->GetAssembler();
     ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data()));
 
@@ -123,27 +123,9 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
   }
 
  private:
-  class InternalCodeAllocator : public CodeAllocator {
-   public:
-    InternalCodeAllocator() {}
-
-    uint8_t* Allocate(size_t size) override {
-      memory_.resize(size);
-      return memory_.data();
-    }
-
-    ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
-
-   private:
-    std::vector<uint8_t> memory_;
-
-    DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
-  };
-
   HGraph* graph_;
   std::unique_ptr<CodeGenerator> code_gen_;
   ArenaVector<HBasicBlock*> blocks_;
-  InternalCodeAllocator code_allocator_;
 };
 
 #define TEST_ISA(isa)                                                 \
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 00eb6e5c42..040c2449a7 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -69,28 +69,6 @@ static constexpr size_t kArenaAllocatorMemoryReportThreshold = 8 * MB;
 static constexpr const char* kPassNameSeparator = "$";
 
 /**
- * Used by the code generator, to allocate the code in a vector.
- */
-class CodeVectorAllocator final : public CodeAllocator {
- public:
-  explicit CodeVectorAllocator(ArenaAllocator* allocator)
-      : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {}
-
-  uint8_t* Allocate(size_t size) override {
-    memory_.resize(size);
-    return &memory_[0];
-  }
-
-  ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
-  uint8_t* GetData() { return memory_.data(); }
-
- private:
-  ArenaVector<uint8_t> memory_;
-
-  DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
-};
-
-/**
  * Filter to apply to the visualizer. Methods whose name contain that filter will
  * be dumped.
  */
@@ -361,7 +339,6 @@ class OptimizingCompiler final : public Compiler {
 
   // Create a 'CompiledMethod' for an optimized graph.
   CompiledMethod* Emit(ArenaAllocator* allocator,
-                       CodeVectorAllocator* code_allocator,
                        CodeGenerator* codegen,
                        bool is_intrinsic,
                        const dex::CodeItem* item) const;
@@ -372,10 +349,8 @@ class OptimizingCompiler final : public Compiler {
   // 1) Builds the graph. Returns null if it failed to build it.
   // 2) Transforms the graph to SSA. Returns null if it failed.
   // 3) Runs optimizations on the graph, including register allocator.
-  // 4) Generates code with the `code_allocator` provided.
   CodeGenerator* TryCompile(ArenaAllocator* allocator,
                             ArenaStack* arena_stack,
-                            CodeVectorAllocator* code_allocator,
                             const DexCompilationUnit& dex_compilation_unit,
                             ArtMethod* method,
                             CompilationKind compilation_kind,
@@ -383,7 +358,6 @@ class OptimizingCompiler final : public Compiler {
 
   CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator,
                                      ArenaStack* arena_stack,
-                                     CodeVectorAllocator* code_allocator,
                                      const DexCompilationUnit& dex_compilation_unit,
                                      ArtMethod* method,
                                      VariableSizedHandleScope* handles) const;
@@ -440,24 +414,33 @@ void OptimizingCompiler::DumpInstructionSetFeaturesToCfg() const {
   std::string isa_string =
       std::string("isa:") + GetInstructionSetString(features->GetInstructionSet());
   std::string features_string = "isa_features:" + features->GetFeatureString();
+  std::string read_barrier_type = "none";
+  if (gUseReadBarrier) {
+    if (art::kUseBakerReadBarrier)
+      read_barrier_type = "baker";
+    else if (art::kUseTableLookupReadBarrier)
+      read_barrier_type = "tablelookup";
+  }
+  std::string read_barrier_string = ART_FORMAT("read_barrier_type:{}", read_barrier_type);
   // It is assumed that visualizer_output_ is empty when calling this function, hence the fake
   // compilation block containing the ISA features will be printed at the beginning of the .cfg
   // file.
-  *visualizer_output_
-      << HGraphVisualizer::InsertMetaDataAsCompilationBlock(isa_string + ' ' + features_string);
+  *visualizer_output_ << HGraphVisualizer::InsertMetaDataAsCompilationBlock(
+      isa_string + ' ' + features_string + ' ' + read_barrier_string);
 }
 
-bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED,
-                                          const DexFile& dex_file ATTRIBUTE_UNUSED) const {
+bool OptimizingCompiler::CanCompileMethod([[maybe_unused]] uint32_t method_idx,
+                                          [[maybe_unused]] const DexFile& dex_file) const {
   return true;
 }
 
 static bool IsInstructionSetSupported(InstructionSet instruction_set) {
-  return instruction_set == InstructionSet::kArm
-      || instruction_set == InstructionSet::kArm64
-      || instruction_set == InstructionSet::kThumb2
-      || instruction_set == InstructionSet::kX86
-      || instruction_set == InstructionSet::kX86_64;
+  return instruction_set == InstructionSet::kArm ||
+         instruction_set == InstructionSet::kArm64 ||
+         instruction_set == InstructionSet::kThumb2 ||
+         instruction_set == InstructionSet::kRiscv64 ||
+         instruction_set == InstructionSet::kX86 ||
+         instruction_set == InstructionSet::kX86_64;
 }
 
 bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
@@ -469,7 +452,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
     case InstructionSet::kThumb2:
     case InstructionSet::kArm: {
       OptimizationDef arm_optimizations[] = {
-        OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
+          OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
       };
       return RunOptimizations(graph,
                               codegen,
@@ -481,7 +464,7 @@ bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
 #ifdef ART_ENABLE_CODEGEN_x86
     case InstructionSet::kX86: {
       OptimizationDef x86_optimizations[] = {
-        OptDef(OptimizationPass::kPcRelativeFixupsX86),
+          OptDef(OptimizationPass::kPcRelativeFixupsX86),
       };
       return RunOptimizations(graph,
                               codegen,
@@ -508,11 +491,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
     case InstructionSet::kThumb2:
     case InstructionSet::kArm: {
       OptimizationDef arm_optimizations[] = {
-        OptDef(OptimizationPass::kInstructionSimplifierArm),
-        OptDef(OptimizationPass::kSideEffectsAnalysis),
-        OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
-        OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
-        OptDef(OptimizationPass::kScheduling)
+          OptDef(OptimizationPass::kInstructionSimplifierArm),
+          OptDef(OptimizationPass::kSideEffectsAnalysis),
+          OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+          OptDef(OptimizationPass::kCriticalNativeAbiFixupArm),
+          OptDef(OptimizationPass::kScheduling)
       };
       return RunOptimizations(graph,
                               codegen,
@@ -524,10 +507,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
 #ifdef ART_ENABLE_CODEGEN_arm64
     case InstructionSet::kArm64: {
       OptimizationDef arm64_optimizations[] = {
-        OptDef(OptimizationPass::kInstructionSimplifierArm64),
-        OptDef(OptimizationPass::kSideEffectsAnalysis),
-        OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
-        OptDef(OptimizationPass::kScheduling)
+          OptDef(OptimizationPass::kInstructionSimplifierArm64),
+          OptDef(OptimizationPass::kSideEffectsAnalysis),
+          OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+          OptDef(OptimizationPass::kScheduling)
       };
       return RunOptimizations(graph,
                               codegen,
@@ -539,11 +522,11 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
 #ifdef ART_ENABLE_CODEGEN_x86
     case InstructionSet::kX86: {
       OptimizationDef x86_optimizations[] = {
-        OptDef(OptimizationPass::kInstructionSimplifierX86),
-        OptDef(OptimizationPass::kSideEffectsAnalysis),
-        OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
-        OptDef(OptimizationPass::kPcRelativeFixupsX86),
-        OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+          OptDef(OptimizationPass::kInstructionSimplifierX86),
+          OptDef(OptimizationPass::kSideEffectsAnalysis),
+          OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+          OptDef(OptimizationPass::kPcRelativeFixupsX86),
+          OptDef(OptimizationPass::kX86MemoryOperandGeneration)
       };
       return RunOptimizations(graph,
                               codegen,
@@ -555,10 +538,10 @@ bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case InstructionSet::kX86_64: {
       OptimizationDef x86_64_optimizations[] = {
-        OptDef(OptimizationPass::kInstructionSimplifierX86_64),
-        OptDef(OptimizationPass::kSideEffectsAnalysis),
-        OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
-        OptDef(OptimizationPass::kX86MemoryOperandGeneration)
+          OptDef(OptimizationPass::kInstructionSimplifierX86_64),
+          OptDef(OptimizationPass::kSideEffectsAnalysis),
+          OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
+          OptDef(OptimizationPass::kX86MemoryOperandGeneration)
       };
       return RunOptimizations(graph,
                               codegen,
@@ -633,68 +616,68 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
   }
 
   OptimizationDef optimizations[] = {
-    // Initial optimizations.
-    OptDef(OptimizationPass::kConstantFolding),
-    OptDef(OptimizationPass::kInstructionSimplifier),
-    OptDef(OptimizationPass::kDeadCodeElimination,
-           "dead_code_elimination$initial"),
-    // Inlining.
-    OptDef(OptimizationPass::kInliner),
-    // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing").
-    OptDef(OptimizationPass::kConstantFolding,
-           "constant_folding$after_inlining",
-           OptimizationPass::kInliner),
-    OptDef(OptimizationPass::kInstructionSimplifier,
-           "instruction_simplifier$after_inlining",
-           OptimizationPass::kInliner),
-    OptDef(OptimizationPass::kDeadCodeElimination,
-           "dead_code_elimination$after_inlining",
-           OptimizationPass::kInliner),
-    // GVN.
-    OptDef(OptimizationPass::kSideEffectsAnalysis,
-           "side_effects$before_gvn"),
-    OptDef(OptimizationPass::kGlobalValueNumbering),
-    // Simplification (TODO: only if GVN occurred).
-    OptDef(OptimizationPass::kSelectGenerator),
-    OptDef(OptimizationPass::kAggressiveConstantFolding,
-           "constant_folding$after_gvn"),
-    OptDef(OptimizationPass::kInstructionSimplifier,
-           "instruction_simplifier$after_gvn"),
-    OptDef(OptimizationPass::kDeadCodeElimination,
-           "dead_code_elimination$after_gvn"),
-    // High-level optimizations.
-    OptDef(OptimizationPass::kSideEffectsAnalysis,
-           "side_effects$before_licm"),
-    OptDef(OptimizationPass::kInvariantCodeMotion),
-    OptDef(OptimizationPass::kInductionVarAnalysis),
-    OptDef(OptimizationPass::kBoundsCheckElimination),
-    OptDef(OptimizationPass::kLoopOptimization),
-    // Simplification.
-    OptDef(OptimizationPass::kConstantFolding,
-           "constant_folding$after_loop_opt"),
-    OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
-           "instruction_simplifier$after_loop_opt"),
-    OptDef(OptimizationPass::kDeadCodeElimination,
-           "dead_code_elimination$after_loop_opt"),
-    // Other high-level optimizations.
-    OptDef(OptimizationPass::kLoadStoreElimination),
-    OptDef(OptimizationPass::kCHAGuardOptimization),
-    OptDef(OptimizationPass::kCodeSinking),
-    // Simplification.
-    OptDef(OptimizationPass::kConstantFolding,
-           "constant_folding$before_codegen"),
-    // The codegen has a few assumptions that only the instruction simplifier
-    // can satisfy. For example, the code generator does not expect to see a
-    // HTypeConversion from a type to the same type.
-    OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
-           "instruction_simplifier$before_codegen"),
-    // Simplification may result in dead code that should be removed prior to
-    // code generation.
-    OptDef(OptimizationPass::kDeadCodeElimination,
-           "dead_code_elimination$before_codegen"),
-    // Eliminate constructor fences after code sinking to avoid
-    // complicated sinking logic to split a fence with many inputs.
-    OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
+      // Initial optimizations.
+      OptDef(OptimizationPass::kConstantFolding),
+      OptDef(OptimizationPass::kInstructionSimplifier),
+      OptDef(OptimizationPass::kDeadCodeElimination,
+             "dead_code_elimination$initial"),
+      // Inlining.
+      OptDef(OptimizationPass::kInliner),
+      // Simplification (if inlining occurred, or if we analyzed the invoke as "always throwing").
+      OptDef(OptimizationPass::kConstantFolding,
+             "constant_folding$after_inlining",
+             OptimizationPass::kInliner),
+      OptDef(OptimizationPass::kInstructionSimplifier,
+             "instruction_simplifier$after_inlining",
+             OptimizationPass::kInliner),
+      OptDef(OptimizationPass::kDeadCodeElimination,
+             "dead_code_elimination$after_inlining",
+             OptimizationPass::kInliner),
+      // GVN.
+      OptDef(OptimizationPass::kSideEffectsAnalysis,
+             "side_effects$before_gvn"),
+      OptDef(OptimizationPass::kGlobalValueNumbering),
+      // Simplification (TODO: only if GVN occurred).
+      OptDef(OptimizationPass::kSelectGenerator),
+      OptDef(OptimizationPass::kAggressiveConstantFolding,
+             "constant_folding$after_gvn"),
+      OptDef(OptimizationPass::kInstructionSimplifier,
+             "instruction_simplifier$after_gvn"),
+      OptDef(OptimizationPass::kDeadCodeElimination,
+             "dead_code_elimination$after_gvn"),
+      // High-level optimizations.
+      OptDef(OptimizationPass::kSideEffectsAnalysis,
+             "side_effects$before_licm"),
+      OptDef(OptimizationPass::kInvariantCodeMotion),
+      OptDef(OptimizationPass::kInductionVarAnalysis),
+      OptDef(OptimizationPass::kBoundsCheckElimination),
+      OptDef(OptimizationPass::kLoopOptimization),
+      // Simplification.
+      OptDef(OptimizationPass::kConstantFolding,
+             "constant_folding$after_loop_opt"),
+      OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
+             "instruction_simplifier$after_loop_opt"),
+      OptDef(OptimizationPass::kDeadCodeElimination,
+             "dead_code_elimination$after_loop_opt"),
+      // Other high-level optimizations.
+      OptDef(OptimizationPass::kLoadStoreElimination),
+      OptDef(OptimizationPass::kCHAGuardOptimization),
+      OptDef(OptimizationPass::kCodeSinking),
+      // Simplification.
+      OptDef(OptimizationPass::kConstantFolding,
+             "constant_folding$before_codegen"),
+      // The codegen has a few assumptions that only the instruction simplifier
+      // can satisfy. For example, the code generator does not expect to see a
+      // HTypeConversion from a type to the same type.
+      OptDef(OptimizationPass::kAggressiveInstructionSimplifier,
+             "instruction_simplifier$before_codegen"),
+      // Simplification may result in dead code that should be removed prior to
+      // code generation.
+      OptDef(OptimizationPass::kDeadCodeElimination,
+             "dead_code_elimination$before_codegen"),
+      // Eliminate constructor fences after code sinking to avoid
+      // complicated sinking logic to split a fence with many inputs.
+      OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
   };
   RunOptimizations(graph,
                    codegen,
@@ -719,7 +702,6 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator*
 }
 
 CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
-                                         CodeVectorAllocator* code_allocator,
                                          CodeGenerator* codegen,
                                          bool is_intrinsic,
                                          const dex::CodeItem* code_item_for_osr_check) const {
@@ -729,7 +711,7 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
   CompiledCodeStorage* storage = GetCompiledCodeStorage();
   CompiledMethod* compiled_method = storage->CreateCompiledMethod(
       codegen->GetInstructionSet(),
-      code_allocator->GetMemory(),
+      codegen->GetCode(),
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const linker::LinkerPatch>(linker_patches),
@@ -747,9 +729,92 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
   return compiled_method;
 }
 
+// TODO(riscv64): Remove this check when codegen is complete.
+#ifdef ART_ENABLE_CODEGEN_riscv64
+static bool CanAssembleGraphForRiscv64(HGraph* graph) {
+  for (HBasicBlock* block : graph->GetPostOrder()) {
+    // Phis are implemented (and they have no code to emit), so check only non-Phi instructions.
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      switch (it.Current()->GetKind()) {
+        case HInstruction::kParallelMove:
+          // ParallelMove is supported but it is inserted by the register allocator
+          // and this check is done before register allocation.
+          LOG(FATAL) << "Unexpected ParallelMove before register allocation!";
+          UNREACHABLE();
+        case HInstruction::kExit:
+        case HInstruction::kGoto:
+        case HInstruction::kParameterValue:
+        case HInstruction::kReturn:
+        case HInstruction::kReturnVoid:
+        case HInstruction::kSuspendCheck:
+        case HInstruction::kDoubleConstant:
+        case HInstruction::kFloatConstant:
+        case HInstruction::kIntConstant:
+        case HInstruction::kLongConstant:
+        case HInstruction::kNullConstant:
+        case HInstruction::kLoadClass:
+        case HInstruction::kLoadString:
+        case HInstruction::kLoadMethodHandle:
+        case HInstruction::kLoadMethodType:
+        case HInstruction::kInstanceFieldGet:
+        case HInstruction::kStaticFieldGet:
+        case HInstruction::kArrayGet:
+        case HInstruction::kAbove:
+        case HInstruction::kAboveOrEqual:
+        case HInstruction::kBelow:
+        case HInstruction::kBelowOrEqual:
+        case HInstruction::kEqual:
+        case HInstruction::kGreaterThan:
+        case HInstruction::kGreaterThanOrEqual:
+        case HInstruction::kLessThan:
+        case HInstruction::kLessThanOrEqual:
+        case HInstruction::kNotEqual:
+        case HInstruction::kCompare:
+        case HInstruction::kIf:
+        case HInstruction::kAdd:
+        case HInstruction::kAnd:
+        case HInstruction::kOr:
+        case HInstruction::kSub:
+        case HInstruction::kXor:
+        case HInstruction::kRor:
+        case HInstruction::kShl:
+        case HInstruction::kShr:
+        case HInstruction::kUShr:
+        case HInstruction::kAbs:
+        case HInstruction::kBooleanNot:
+        case HInstruction::kMul:
+        case HInstruction::kNeg:
+        case HInstruction::kNot:
+        case HInstruction::kMin:
+        case HInstruction::kMax:
+        case HInstruction::kInvokeVirtual:
+        case HInstruction::kInvokeInterface:
+        case HInstruction::kCurrentMethod:
+        case HInstruction::kNullCheck:
+          break;
+        case HInstruction::kInvokeStaticOrDirect:
+          if (it.Current()->AsInvokeStaticOrDirect()->GetCodePtrLocation() ==
+                  CodePtrLocation::kCallCriticalNative &&
+              it.Current()->AsInvokeStaticOrDirect()->GetNumberOfArguments() >= 8u) {
+            // TODO(riscv64): If there are more than 8 FP args, some may be passed in GPRs
+            // and this requires a `CriticalNativeAbiFixupRiscv64` pass similar to the one
+            // we have for ARM. This is not yet implemented. For simplicity, we reject all
+            // direct @CriticalNative calls with more than 8 args.
+            return false;
+          }
+          break;
+        default:
+          // Unimplemented instruction.
+          return false;
+      }
+    }
+  }
+  return true;
+}
+#endif
+
 CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
                                               ArenaStack* arena_stack,
-                                              CodeVectorAllocator* code_allocator,
                                               const DexCompilationUnit& dex_compilation_unit,
                                               ArtMethod* method,
                                               CompilationKind compilation_kind,
@@ -906,6 +971,15 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
     WriteBarrierElimination(graph, compilation_stats_.get()).Run();
   }
 
+  // TODO(riscv64): Remove this check when codegen is complete.
+#ifdef ART_ENABLE_CODEGEN_riscv64
+  if (instruction_set == InstructionSet::kRiscv64 && !CanAssembleGraphForRiscv64(graph)) {
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledUnsupportedIsa);
+    return nullptr;
+  }
+#endif
+
   RegisterAllocator::Strategy regalloc_strategy =
     compiler_options.GetRegisterAllocationStrategy();
   AllocateRegisters(graph,
@@ -914,7 +988,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
                     regalloc_strategy,
                     compilation_stats_.get());
 
-  codegen->Compile(code_allocator);
+  codegen->Compile();
   pass_observer.DumpDisassembly();
 
   MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledBytecode);
@@ -924,7 +998,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
 CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
     ArenaAllocator* allocator,
     ArenaStack* arena_stack,
-    CodeVectorAllocator* code_allocator,
     const DexCompilationUnit& dex_compilation_unit,
     ArtMethod* method,
     VariableSizedHandleScope* handles) const {
@@ -986,9 +1059,9 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
   }
 
   OptimizationDef optimizations[] = {
-    // The codegen has a few assumptions that only the instruction simplifier
-    // can satisfy.
-    OptDef(OptimizationPass::kInstructionSimplifier),
+      // The codegen has a few assumptions that only the instruction simplifier
+      // can satisfy.
+      OptDef(OptimizationPass::kInstructionSimplifier),
   };
   RunOptimizations(graph,
                    codegen.get(),
@@ -1002,6 +1075,15 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
     WriteBarrierElimination(graph, compilation_stats_.get()).Run();
   }
 
+  // TODO(riscv64): Remove this check when codegen is complete.
+#ifdef ART_ENABLE_CODEGEN_riscv64
+  if (instruction_set == InstructionSet::kRiscv64 && !CanAssembleGraphForRiscv64(graph)) {
+    MaybeRecordStat(compilation_stats_.get(),
+                    MethodCompilationStat::kNotCompiledUnsupportedIsa);
+    return nullptr;
+  }
+#endif
+
   AllocateRegisters(graph,
                     codegen.get(),
                     &pass_observer,
@@ -1013,7 +1095,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
     return nullptr;
   }
 
-  codegen->Compile(code_allocator);
+  codegen->Compile();
   pass_observer.DumpDisassembly();
 
   VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic()
@@ -1037,7 +1119,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
   DCHECK(runtime->IsAotCompiler());
   ArenaAllocator allocator(runtime->GetArenaPool());
   ArenaStack arena_stack(runtime->GetArenaPool());
-  CodeVectorAllocator code_allocator(&allocator);
   std::unique_ptr<CodeGenerator> codegen;
   bool compiled_intrinsic = false;
   {
@@ -1071,7 +1152,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
       codegen.reset(
           TryCompileIntrinsic(&allocator,
                               &arena_stack,
-                              &code_allocator,
                               dex_compilation_unit,
                               method,
                               &handles));
@@ -1083,7 +1163,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
       codegen.reset(
           TryCompile(&allocator,
                      &arena_stack,
-                     &code_allocator,
                      dex_compilation_unit,
                      method,
                      compiler_options.IsBaseline()
@@ -1094,7 +1173,6 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
   }
   if (codegen.get() != nullptr) {
     compiled_method = Emit(&allocator,
-                           &code_allocator,
                            codegen.get(),
                            compiled_intrinsic,
                            compiled_intrinsic ? nullptr : code_item);
@@ -1115,7 +1193,9 @@ CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
 
   if (kIsDebugBuild &&
       compiler_options.CompileArtTest() &&
-      IsInstructionSetSupported(compiler_options.GetInstructionSet())) {
+      IsInstructionSetSupported(compiler_options.GetInstructionSet()) &&
+      // TODO(riscv64): Enable this check when codegen is complete.
+      compiler_options.GetInstructionSet() != InstructionSet::kRiscv64) {
     // For testing purposes, we put a special marker on method names
     // that should be compiled with this compiler (when the
     // instruction set is supported). This makes sure we're not
@@ -1177,19 +1257,16 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
           /*verified_method=*/ nullptr,
           dex_cache,
           compiling_class);
-      CodeVectorAllocator code_allocator(&allocator);
       // Go to native so that we don't block GC during compilation.
       ScopedThreadSuspension sts(soa.Self(), ThreadState::kNative);
       std::unique_ptr<CodeGenerator> codegen(
           TryCompileIntrinsic(&allocator,
                               &arena_stack,
-                              &code_allocator,
                               dex_compilation_unit,
                               method,
                               &handles));
       if (codegen != nullptr) {
         return Emit(&allocator,
-                    &code_allocator,
                     codegen.get(),
                     /*is_intrinsic=*/ true,
                     /*item=*/ nullptr);
@@ -1221,7 +1298,7 @@ Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
   return new OptimizingCompiler(compiler_options, storage);
 }
 
-bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
+bool EncodeArtMethodInInlineInfo([[maybe_unused]] ArtMethod* method) {
   // Note: the runtime is null only for unit testing.
   return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler();
 }
@@ -1328,7 +1405,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
                             debug_info,
                             /* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(),
                             compilation_kind,
-                            /* has_should_deoptimize_flag= */ false,
                             cha_single_implementation_list)) {
       code_cache->Free(self, region, reserved_code.data(), reserved_data.data());
       return false;
@@ -1342,7 +1418,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
   }
 
   ArenaStack arena_stack(runtime->GetJitArenaPool());
-  CodeVectorAllocator code_allocator(&allocator);
   VariableSizedHandleScope handles(self);
 
   std::unique_ptr<CodeGenerator> codegen;
@@ -1365,7 +1440,6 @@ bool OptimizingCompiler::JitCompile(Thread* self,
     codegen.reset(
         TryCompile(&allocator,
                    &arena_stack,
-                   &code_allocator,
                    dex_compilation_unit,
                    method,
                    compilation_kind,
@@ -1381,7 +1455,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
   ArrayRef<const uint8_t> reserved_data;
   if (!code_cache->Reserve(self,
                            region,
-                           code_allocator.GetMemory().size(),
+                           codegen->GetAssembler()->CodeSize(),
                            stack_map.size(),
                            /*number_of_roots=*/codegen->GetNumberOfJitRoots(),
                            method,
@@ -1394,7 +1468,9 @@ bool OptimizingCompiler::JitCompile(Thread* self,
   const uint8_t* roots_data = reserved_data.data();
 
   std::vector<Handle<mirror::Object>> roots;
-  codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots);
+  codegen->EmitJitRoots(const_cast<uint8_t*>(codegen->GetAssembler()->CodeBufferBaseAddress()),
+                        roots_data,
+                        &roots);
   // The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope.
   DCHECK(std::all_of(roots.begin(),
                      roots.end(),
@@ -1418,7 +1494,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
     info.is_optimized = true;
     info.is_code_address_text_relative = false;
     info.code_address = reinterpret_cast<uintptr_t>(code);
-    info.code_size = code_allocator.GetMemory().size();
+    info.code_size = codegen->GetAssembler()->CodeSize(),
     info.frame_size_in_bytes = codegen->GetFrameSize();
     info.code_info = stack_map.size() == 0 ? nullptr : stack_map.data();
     info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
@@ -1429,22 +1505,23 @@ bool OptimizingCompiler::JitCompile(Thread* self,
                           region,
                           method,
                           reserved_code,
-                          code_allocator.GetMemory(),
+                          codegen->GetCode(),
                           reserved_data,
                           roots,
                           ArrayRef<const uint8_t>(stack_map),
                           debug_info,
                           /* is_full_debug_info= */ compiler_options.GetGenerateDebugInfo(),
                           compilation_kind,
-                          codegen->GetGraph()->HasShouldDeoptimizeFlag(),
                           codegen->GetGraph()->GetCHASingleImplementationList())) {
+    CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(stack_map.data()),
+             codegen->GetGraph()->HasShouldDeoptimizeFlag());
     code_cache->Free(self, region, reserved_code.data(), reserved_data.data());
     return false;
   }
 
   Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
   if (jit_logger != nullptr) {
-    jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method);
+    jit_logger->WriteLog(code, codegen->GetAssembler()->CodeSize(), method);
   }
 
   if (kArenaAllocatorCountAllocations) {
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index a1c05e9cad..d2b993280d 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -81,8 +81,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
     message_ << ")";
   }
 
-  void SpillScratch(int reg ATTRIBUTE_UNUSED) override {}
-  void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {}
+  void SpillScratch([[maybe_unused]] int reg) override {}
+  void RestoreScratch([[maybe_unused]] int reg) override {}
 
   std::string GetMessage() const {
     return  message_.str();
@@ -126,7 +126,7 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
     return scratch;
   }
 
-  void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {}
+  void FreeScratchLocation([[maybe_unused]] Location loc) override {}
 
   void EmitMove(size_t index) override {
     MoveOperands* move = moves_[index];
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index d3da3d3ce1..56341f106f 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -62,7 +62,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
   }
 
   void VisitReturn(HReturn* ret) override {
-    HConstant* value = ret->InputAt(0)->AsConstant();
+    HConstant* value = ret->InputAt(0)->AsConstantOrNull();
     if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) {
       ReplaceInput(ret, value, 0, true);
     }
@@ -95,7 +95,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
   }
 
   void BinaryFP(HBinaryOperation* bin) {
-    HConstant* rhs = bin->InputAt(1)->AsConstant();
+    HConstant* rhs = bin->InputAt(1)->AsConstantOrNull();
     if (rhs != nullptr && DataType::IsFloatingPointType(rhs->GetType())) {
       ReplaceInput(bin, rhs, 1, false);
     }
@@ -193,7 +193,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
   }
 
   void HandleInvoke(HInvoke* invoke) {
-    HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+    HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirectOrNull();
 
     // If this is an invoke-static/-direct with PC-relative addressing (within boot image
     // or using .bss or .data.bimg.rel.ro), we need the PC-relative address base.
@@ -207,7 +207,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
       base_added = true;
     }
 
-    HInvokeInterface* invoke_interface = invoke->AsInvokeInterface();
+    HInvokeInterface* invoke_interface = invoke->AsInvokeInterfaceOrNull();
     if (invoke_interface != nullptr &&
         IsPcRelativeMethodLoadKind(invoke_interface->GetHiddenArgumentLoadKind())) {
       HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
@@ -219,7 +219,7 @@ class PCRelativeHandlerVisitor final : public HGraphVisitor {
     // Ensure that we can load FP arguments from the constant area.
     HInputsRef inputs = invoke->GetInputs();
     for (size_t i = 0; i < inputs.size(); i++) {
-      HConstant* input = inputs[i]->AsConstant();
+      HConstant* input = inputs[i]->AsConstantOrNull();
       if (input != nullptr && DataType::IsFloatingPointType(input->GetType())) {
         ReplaceInput(invoke, input, i, true);
       }
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 91bae5f49b..3a5cceed9a 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -254,7 +254,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
   HInstruction* input_two = compare->InputAt(1);
   HLoadClass* load_class = input_one->IsLoadClass()
       ? input_one->AsLoadClass()
-      : input_two->AsLoadClass();
+      : input_two->AsLoadClassOrNull();
   if (load_class == nullptr) {
     return;
   }
@@ -335,7 +335,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) {
 }
 
 void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* block) {
-  HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+  HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull();
   if (ifInstruction == nullptr) {
     return;
   }
@@ -453,7 +453,7 @@ static bool MatchIfInstanceOf(HIf* ifInstruction,
 // If that's the case insert an HBoundType instruction to bound the type of `x`
 // to `ClassX` in the scope of the dominated blocks.
 void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* block) {
-  HIf* ifInstruction = block->GetLastInstruction()->AsIf();
+  HIf* ifInstruction = block->GetLastInstruction()->AsIfOrNull();
   if (ifInstruction == nullptr) {
     return;
   }
@@ -539,9 +539,14 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction*
   DCHECK_EQ(instr->GetType(), DataType::Type::kReference);
 
   ScopedObjectAccess soa(Thread::Current());
-  ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_);
-  ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType(
-      type_idx, dex_cache, dex_cache->GetClassLoader());
+  StackHandleScope<2> hs(soa.Self());
+  Handle<mirror::DexCache> dex_cache =
+      hs.NewHandle(FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_));
+  Handle<mirror::ClassLoader> loader = hs.NewHandle(dex_cache->GetClassLoader());
+  ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->ResolveType(
+      type_idx, dex_cache, loader);
+  DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending());
+  soa.Self()->ClearException();  // Clean up the exception left by type resolution if any.
   SetClassAsTypeInfo(instr, klass, is_exact);
 }
 
@@ -704,7 +709,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
 }
 
 void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
-  HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
+  HBoundType* bound_type = check_cast->GetNext()->AsBoundTypeOrNull();
   if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
     // The next instruction is not an uninitialized BoundType. This must be
     // an RTP pass after SsaBuilder and we do not need to do anything.
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 2b012fcd67..ffd94e56b5 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -468,7 +468,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) {
   LoopOptions lo(GetParam());
   std::default_random_engine g(
       lo.initial_null_state_ != InitialNullState::kTrueRandom ? 42 : std::rand());
-  std::uniform_int_distribution<bool> uid(false, true);
+  std::uniform_int_distribution<int> uid(0, 1);
   RunVisitListTest([&](std::vector<HInstruction*>& lst, HInstruction* null_input) {
     auto pred_null = false;
     auto next_null = [&]() {
@@ -482,7 +482,7 @@ TEST_P(LoopReferenceTypePropagationTestGroup, RunVisitTest) {
           return pred_null;
         case InitialNullState::kRandomSetSeed:
         case InitialNullState::kTrueRandom:
-          return uid(g);
+          return uid(g) > 0;
       }
     };
     HPhi* nulled_phi = lo.null_insertion_ >= 0 ? lst[lo.null_insertion_]->AsPhi() : nullptr;
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 53e11f2c3d..a4b1698b8d 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -531,9 +531,9 @@ void RegisterAllocationResolver::AddInputMoveFor(HInstruction* input,
 
   HInstruction* previous = user->GetPrevious();
   HParallelMove* move = nullptr;
-  if (previous == nullptr
-      || !previous->IsParallelMove()
-      || previous->GetLifetimePosition() < user->GetLifetimePosition()) {
+  if (previous == nullptr ||
+      !previous->IsParallelMove() ||
+      previous->GetLifetimePosition() < user->GetLifetimePosition()) {
     move = new (allocator_) HParallelMove(allocator_);
     move->SetLifetimePosition(user->GetLifetimePosition());
     user->GetBlock()->InsertInstructionBefore(move, user);
@@ -593,7 +593,7 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
   } else if (IsInstructionEnd(position)) {
     // Move must happen after the instruction.
     DCHECK(!at->IsControlFlow());
-    move = at->GetNext()->AsParallelMove();
+    move = at->GetNext()->AsParallelMoveOrNull();
     // This is a parallel move for connecting siblings in a same block. We need to
     // differentiate it with moves for connecting blocks, and input moves.
     if (move == nullptr || move->GetLifetimePosition() > position) {
@@ -604,15 +604,15 @@ void RegisterAllocationResolver::InsertParallelMoveAt(size_t position,
   } else {
     // Move must happen before the instruction.
     HInstruction* previous = at->GetPrevious();
-    if (previous == nullptr
-        || !previous->IsParallelMove()
-        || previous->GetLifetimePosition() != position) {
+    if (previous == nullptr ||
+        !previous->IsParallelMove() ||
+        previous->GetLifetimePosition() != position) {
       // If the previous is a parallel move, then its position must be lower
       // than the given `position`: it was added just after the non-parallel
       // move instruction that precedes `instruction`.
-      DCHECK(previous == nullptr
-             || !previous->IsParallelMove()
-             || previous->GetLifetimePosition() < position);
+      DCHECK(previous == nullptr ||
+             !previous->IsParallelMove() ||
+             previous->GetLifetimePosition() < position);
       move = new (allocator_) HParallelMove(allocator_);
       move->SetLifetimePosition(position);
       at->GetBlock()->InsertInstructionBefore(move, at);
@@ -643,8 +643,9 @@ void RegisterAllocationResolver::InsertParallelMoveAtExitOf(HBasicBlock* block,
   // This is a parallel move for connecting blocks. We need to differentiate
   // it with moves for connecting siblings in a same block, and output moves.
   size_t position = last->GetLifetimePosition();
-  if (previous == nullptr || !previous->IsParallelMove()
-      || previous->AsParallelMove()->GetLifetimePosition() != position) {
+  if (previous == nullptr ||
+      !previous->IsParallelMove() ||
+      previous->AsParallelMove()->GetLifetimePosition() != position) {
     move = new (allocator_) HParallelMove(allocator_);
     move->SetLifetimePosition(position);
     block->InsertInstructionBefore(move, last);
@@ -662,7 +663,7 @@ void RegisterAllocationResolver::InsertParallelMoveAtEntryOf(HBasicBlock* block,
   if (source.Equals(destination)) return;
 
   HInstruction* first = block->GetFirstInstruction();
-  HParallelMove* move = first->AsParallelMove();
+  HParallelMove* move = first->AsParallelMoveOrNull();
   size_t position = block->GetLifetimeStart();
   // This is a parallel move for connecting blocks. We need to differentiate
   // it with moves for connecting siblings in a same block, and input moves.
@@ -686,7 +687,7 @@ void RegisterAllocationResolver::InsertMoveAfter(HInstruction* instruction,
   }
 
   size_t position = instruction->GetLifetimePosition() + 1;
-  HParallelMove* move = instruction->GetNext()->AsParallelMove();
+  HParallelMove* move = instruction->GetNext()->AsParallelMoveOrNull();
   // This is a parallel move for moving the output of an instruction. We need
   // to differentiate with input moves, moves for connecting siblings in a
   // and moves for connecting blocks.
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index e4c2d74908..f8b057d4a8 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -23,7 +23,6 @@
 #include "base/scoped_arena_containers.h"
 #include "base/bit_vector-inl.h"
 #include "code_generator.h"
-#include "register_allocator_graph_color.h"
 #include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 
@@ -45,8 +44,8 @@ std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocato
       return std::unique_ptr<RegisterAllocator>(
           new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis));
     case kRegisterAllocatorGraphColor:
-      return std::unique_ptr<RegisterAllocator>(
-          new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis));
+      LOG(FATAL) << "Graph coloring register allocator has been removed.";
+      UNREACHABLE();
     default:
       LOG(FATAL) << "Invalid register allocation strategy: " << strategy;
       UNREACHABLE();
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
deleted file mode 100644
index a7c891d4e7..0000000000
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ /dev/null
@@ -1,2086 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "register_allocator_graph_color.h"
-
-#include "code_generator.h"
-#include "linear_order.h"
-#include "register_allocation_resolver.h"
-#include "ssa_liveness_analysis.h"
-#include "thread-current-inl.h"
-
-namespace art HIDDEN {
-
-// Highest number of registers that we support for any platform. This can be used for std::bitset,
-// for example, which needs to know its size at compile time.
-static constexpr size_t kMaxNumRegs = 32;
-
-// The maximum number of graph coloring attempts before triggering a DCHECK.
-// This is meant to catch changes to the graph coloring algorithm that undermine its forward
-// progress guarantees. Forward progress for the algorithm means splitting live intervals on
-// every graph coloring attempt so that eventually the interference graph will be sparse enough
-// to color. The main threat to forward progress is trying to split short intervals which cannot be
-// split further; this could cause infinite looping because the interference graph would never
-// change. This is avoided by prioritizing short intervals before long ones, so that long
-// intervals are split when coloring fails.
-static constexpr size_t kMaxGraphColoringAttemptsDebug = 100;
-
-// We always want to avoid spilling inside loops.
-static constexpr size_t kLoopSpillWeightMultiplier = 10;
-
-// If we avoid moves in single jump blocks, we can avoid jumps to jumps.
-static constexpr size_t kSingleJumpBlockWeightMultiplier = 2;
-
-// We avoid moves in blocks that dominate the exit block, since these blocks will
-// be executed on every path through the method.
-static constexpr size_t kDominatesExitBlockWeightMultiplier = 2;
-
-enum class CoalesceKind {
-  kAdjacentSibling,       // Prevents moves at interval split points.
-  kFixedOutputSibling,    // Prevents moves from a fixed output location.
-  kFixedInput,            // Prevents moves into a fixed input location.
-  kNonlinearControlFlow,  // Prevents moves between blocks.
-  kPhi,                   // Prevents phi resolution moves.
-  kFirstInput,            // Prevents a single input move.
-  kAnyInput,              // May lead to better instruction selection / smaller encodings.
-};
-
-std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) {
-  return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind);
-}
-
-static size_t LoopDepthAt(HBasicBlock* block) {
-  HLoopInformation* loop_info = block->GetLoopInformation();
-  size_t depth = 0;
-  while (loop_info != nullptr) {
-    ++depth;
-    loop_info = loop_info->GetPreHeader()->GetLoopInformation();
-  }
-  return depth;
-}
-
-// Return the runtime cost of inserting a move instruction at the specified location.
-static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) {
-  HBasicBlock* block = liveness.GetBlockFromPosition(position / 2);
-  DCHECK(block != nullptr);
-  size_t cost = 1;
-  if (block->IsSingleJump()) {
-    cost *= kSingleJumpBlockWeightMultiplier;
-  }
-  if (block->Dominates(block->GetGraph()->GetExitBlock())) {
-    cost *= kDominatesExitBlockWeightMultiplier;
-  }
-  for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) {
-    cost *= kLoopSpillWeightMultiplier;
-  }
-  return cost;
-}
-
-// In general, we estimate coalesce priority by whether it will definitely avoid a move,
-// and by how likely it is to create an interference graph that's harder to color.
-static size_t ComputeCoalescePriority(CoalesceKind kind,
-                                      size_t position,
-                                      const SsaLivenessAnalysis& liveness) {
-  if (kind == CoalesceKind::kAnyInput) {
-    // This type of coalescing can affect instruction selection, but not moves, so we
-    // give it the lowest priority.
-    return 0;
-  } else {
-    return CostForMoveAt(position, liveness);
-  }
-}
-
-enum class CoalesceStage {
-  kWorklist,  // Currently in the iterative coalescing worklist.
-  kActive,    // Not in a worklist, but could be considered again during iterative coalescing.
-  kInactive,  // No longer considered until last-chance coalescing.
-  kDefunct,   // Either the two nodes interfere, or have already been coalesced.
-};
-
-std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) {
-  return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage);
-}
-
-// Represents a coalesce opportunity between two nodes.
-struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> {
-  CoalesceOpportunity(InterferenceNode* a,
-                      InterferenceNode* b,
-                      CoalesceKind kind,
-                      size_t position,
-                      const SsaLivenessAnalysis& liveness)
-        : node_a(a),
-          node_b(b),
-          stage(CoalesceStage::kWorklist),
-          priority(ComputeCoalescePriority(kind, position, liveness)) {}
-
-  // Compare two coalesce opportunities based on their priority.
-  // Return true if lhs has a lower priority than that of rhs.
-  static bool CmpPriority(const CoalesceOpportunity* lhs,
-                          const CoalesceOpportunity* rhs) {
-    return lhs->priority < rhs->priority;
-  }
-
-  InterferenceNode* const node_a;
-  InterferenceNode* const node_b;
-
-  // The current stage of this coalesce opportunity, indicating whether it is in a worklist,
-  // and whether it should still be considered.
-  CoalesceStage stage;
-
-  // The priority of this coalesce opportunity, based on heuristics.
-  const size_t priority;
-};
-
-enum class NodeStage {
-  kInitial,           // Uninitialized.
-  kPrecolored,        // Marks fixed nodes.
-  kSafepoint,         // Marks safepoint nodes.
-  kPrunable,          // Marks uncolored nodes in the interference graph.
-  kSimplifyWorklist,  // Marks non-move-related nodes with degree less than the number of registers.
-  kFreezeWorklist,    // Marks move-related nodes with degree less than the number of registers.
-  kSpillWorklist,     // Marks nodes with degree greater or equal to the number of registers.
-  kPruned             // Marks nodes already pruned from the interference graph.
-};
-
-std::ostream& operator<<(std::ostream& os, const NodeStage& stage) {
-  return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage);
-}
-
-// Returns the estimated cost of spilling a particular live interval.
-static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) {
-  if (interval->HasRegister()) {
-    // Intervals with a fixed register cannot be spilled.
-    return std::numeric_limits<float>::min();
-  }
-
-  size_t length = interval->GetLength();
-  if (length == 1) {
-    // Tiny intervals should have maximum priority, since they cannot be split any further.
-    return std::numeric_limits<float>::max();
-  }
-
-  size_t use_weight = 0;
-  if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) {
-    // Cost for spilling at a register definition point.
-    use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
-  }
-
-  // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
-  // [interval->GetStart() + 1, interval->GetEnd() + 1)
-  auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
-                                                 interval->GetUses().end(),
-                                                 interval->GetStart() + 1u,
-                                                 interval->GetEnd() + 1u);
-  for (const UsePosition& use : matching_use_range) {
-    if (use.GetUser() != nullptr && use.RequiresRegister()) {
-      // Cost for spilling at a register use point.
-      use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
-    }
-  }
-
-  // We divide by the length of the interval because we want to prioritize
-  // short intervals; we do not benefit much if we split them further.
-  return static_cast<float>(use_weight) / static_cast<float>(length);
-}
-
-// Interference nodes make up the interference graph, which is the primary data structure in
-// graph coloring register allocation. Each node represents a single live interval, and contains
-// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory,
-// pre-colored nodes never contain outgoing edges (only incoming ones).
-//
-// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed,
-// but outgoing edges remain in order to later color the node based on the colors of its neighbors.
-//
-// Note that a pair interval is represented by a single node in the interference graph, which
-// essentially requires two colors. One consequence of this is that the degree of a node is not
-// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum
-// number of colors with which a node could interfere. We model this by giving edges different
-// weights (1 or 2) to control how much it increases the degree of adjacent nodes.
-// For example, the edge between two single nodes will have weight 1. On the other hand,
-// the edge between a single node and a pair node will have weight 2. This is because the pair
-// node could block up to two colors for the single node, and because the single node could
-// block an entire two-register aligned slot for the pair node.
-// The degree is defined this way because we use it to decide whether a node is guaranteed a color,
-// and thus whether it is safe to prune it from the interference graph early on.
-class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> {
- public:
-  InterferenceNode(LiveInterval* interval,
-                   const SsaLivenessAnalysis& liveness)
-        : stage(NodeStage::kInitial),
-          interval_(interval),
-          adjacent_nodes_(nullptr),
-          coalesce_opportunities_(nullptr),
-          out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0),
-          alias_(this),
-          spill_weight_(ComputeSpillWeight(interval, liveness)),
-          requires_color_(interval->RequiresRegister()),
-          needs_spill_slot_(false) {
-    DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval";
-  }
-
-  void AddInterference(InterferenceNode* other,
-                       bool guaranteed_not_interfering_yet,
-                       ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) {
-    DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences";
-    DCHECK_NE(this, other) << "Should not create self loops in the interference graph";
-    DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another";
-    DCHECK_NE(stage, NodeStage::kPruned);
-    DCHECK_NE(other->stage, NodeStage::kPruned);
-    if (adjacent_nodes_ == nullptr) {
-      ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator());
-      storage->emplace_back(adapter);
-      adjacent_nodes_ = &storage->back();
-    }
-    if (guaranteed_not_interfering_yet) {
-      DCHECK(!ContainsElement(GetAdjacentNodes(), other));
-      adjacent_nodes_->push_back(other);
-      out_degree_ += EdgeWeightWith(other);
-    } else {
-      if (!ContainsElement(GetAdjacentNodes(), other)) {
-        adjacent_nodes_->push_back(other);
-        out_degree_ += EdgeWeightWith(other);
-      }
-    }
-  }
-
-  void RemoveInterference(InterferenceNode* other) {
-    DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node";
-    DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning";
-    if (adjacent_nodes_ != nullptr) {
-      auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other);
-      if (it != adjacent_nodes_->end()) {
-        adjacent_nodes_->erase(it);
-        out_degree_ -= EdgeWeightWith(other);
-      }
-    }
-  }
-
-  bool ContainsInterference(InterferenceNode* other) const {
-    DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences";
-    DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences";
-    return ContainsElement(GetAdjacentNodes(), other);
-  }
-
-  LiveInterval* GetInterval() const {
-    return interval_;
-  }
-
-  ArrayRef<InterferenceNode*> GetAdjacentNodes() const {
-    return adjacent_nodes_ != nullptr
-        ? ArrayRef<InterferenceNode*>(*adjacent_nodes_)
-        : ArrayRef<InterferenceNode*>();
-  }
-
-  size_t GetOutDegree() const {
-    // Pre-colored nodes have infinite degree.
-    DCHECK_IMPLIES(IsPrecolored(), out_degree_ == std::numeric_limits<size_t>::max());
-    return out_degree_;
-  }
-
-  void AddCoalesceOpportunity(CoalesceOpportunity* opportunity,
-                              ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) {
-    if (coalesce_opportunities_ == nullptr) {
-      ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator());
-      storage->emplace_back(adapter);
-      coalesce_opportunities_ = &storage->back();
-    }
-    coalesce_opportunities_->push_back(opportunity);
-  }
-
-  void ClearCoalesceOpportunities() {
-    coalesce_opportunities_ = nullptr;
-  }
-
-  bool IsMoveRelated() const {
-    for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) {
-      if (opportunity->stage == CoalesceStage::kWorklist ||
-          opportunity->stage == CoalesceStage::kActive) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  // Return whether this node already has a color.
-  // Used to find fixed nodes in the interference graph before coloring.
-  bool IsPrecolored() const {
-    return interval_->HasRegister();
-  }
-
-  bool IsPair() const {
-    return interval_->HasHighInterval();
-  }
-
-  void SetAlias(InterferenceNode* rep) {
-    DCHECK_NE(rep->stage, NodeStage::kPruned);
-    DCHECK_EQ(this, alias_) << "Should only set a node's alias once";
-    alias_ = rep;
-  }
-
-  InterferenceNode* GetAlias() {
-    if (alias_ != this) {
-      // Recurse in order to flatten tree of alias pointers.
-      alias_ = alias_->GetAlias();
-    }
-    return alias_;
-  }
-
-  ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const {
-    return coalesce_opportunities_ != nullptr
-        ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_)
-        : ArrayRef<CoalesceOpportunity*>();
-  }
-
-  float GetSpillWeight() const {
-    return spill_weight_;
-  }
-
-  bool RequiresColor() const {
-    return requires_color_;
-  }
-
-  // We give extra weight to edges adjacent to pair nodes. See the general comment on the
-  // interference graph above.
-  size_t EdgeWeightWith(const InterferenceNode* other) const {
-    return (IsPair() || other->IsPair()) ? 2 : 1;
-  }
-
-  bool NeedsSpillSlot() const {
-    return needs_spill_slot_;
-  }
-
-  void SetNeedsSpillSlot() {
-    needs_spill_slot_ = true;
-  }
-
-  // The current stage of this node, indicating which worklist it belongs to.
-  NodeStage stage;
-
- private:
-  // The live interval that this node represents.
-  LiveInterval* const interval_;
-
-  // All nodes interfering with this one.
-  // We use an unsorted vector as a set, since a tree or hash set is too heavy for the
-  // set sizes that we encounter. Using a vector leads to much better performance.
-  ScopedArenaVector<InterferenceNode*>* adjacent_nodes_;  // Owned by ColoringIteration.
-
-  // Interference nodes that this node should be coalesced with to reduce moves.
-  ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_;  // Owned by ColoringIteration.
-
-  // The maximum number of colors with which this node could interfere. This could be more than
-  // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes.
-  // We use "out" degree because incoming edges come from nodes already pruned from the graph,
-  // and do not affect the coloring of this node.
-  // Pre-colored nodes are treated as having infinite degree.
-  size_t out_degree_;
-
-  // The node representing this node in the interference graph.
-  // Initially set to `this`, and only changed if this node is coalesced into another.
-  InterferenceNode* alias_;
-
-  // The cost of splitting and spilling this interval to the stack.
-  // Nodes with a higher spill weight should be prioritized when assigning registers.
-  // This is essentially based on use density and location; short intervals with many uses inside
-  // deeply nested loops have a high spill weight.
-  const float spill_weight_;
-
-  const bool requires_color_;
-
-  bool needs_spill_slot_;
-
-  DISALLOW_COPY_AND_ASSIGN(InterferenceNode);
-};
-
-// The order in which we color nodes is important. To guarantee forward progress,
-// we prioritize intervals that require registers, and after that we prioritize
-// short intervals. That way, if we fail to color a node, it either won't require a
-// register, or it will be a long interval that can be split in order to make the
-// interference graph sparser.
-// To improve code quality, we prioritize intervals used frequently in deeply nested loops.
-// (This metric is secondary to the forward progress requirements above.)
-// TODO: May also want to consider:
-// - Constants (since they can be rematerialized)
-// - Allocated spill slots
-static bool HasGreaterNodePriority(const InterferenceNode* lhs,
-                                   const InterferenceNode* rhs) {
-  // (1) Prioritize the node that requires a color.
-  if (lhs->RequiresColor() != rhs->RequiresColor()) {
-    return lhs->RequiresColor();
-  }
-
-  // (2) Prioritize the interval that has a higher spill weight.
-  return lhs->GetSpillWeight() > rhs->GetSpillWeight();
-}
-
-// A ColoringIteration holds the many data structures needed for a single graph coloring attempt,
-// and provides methods for each phase of the attempt.
-class ColoringIteration {
- public:
-  ColoringIteration(RegisterAllocatorGraphColor* register_allocator,
-                    ScopedArenaAllocator* allocator,
-                    bool processing_core_regs,
-                    size_t num_regs)
-        : register_allocator_(register_allocator),
-          allocator_(allocator),
-          processing_core_regs_(processing_core_regs),
-          num_regs_(num_regs),
-          interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-          prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-          pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-          simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-          freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-          spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)),
-          coalesce_worklist_(CoalesceOpportunity::CmpPriority,
-                             allocator->Adapter(kArenaAllocRegisterAllocator)),
-          adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-          coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {}
-
-  // Use the intervals collected from instructions to construct an
-  // interference graph mapping intervals to adjacency lists.
-  // Also, collect synthesized safepoint nodes, used to keep
-  // track of live intervals across safepoints.
-  // TODO: Should build safepoints elsewhere.
-  void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals,
-                              const ScopedArenaVector<InterferenceNode*>& physical_nodes);
-
-  // Add coalesce opportunities to interference nodes.
-  void FindCoalesceOpportunities();
-
-  // Prune nodes from the interference graph to be colored later. Build
-  // a stack (pruned_nodes) containing these intervals in an order determined
-  // by various heuristics.
-  void PruneInterferenceGraph();
-
-  // Process pruned_intervals_ to color the interference graph, spilling when
-  // necessary. Returns true if successful. Else, some intervals have been
-  // split, and the interference graph should be rebuilt for another attempt.
-  bool ColorInterferenceGraph();
-
-  // Return prunable nodes.
-  // The register allocator will need to access prunable nodes after coloring
-  // in order to tell the code generator which registers have been assigned.
-  ArrayRef<InterferenceNode* const> GetPrunableNodes() const {
-    return ArrayRef<InterferenceNode* const>(prunable_nodes_);
-  }
-
- private:
-  // Create a coalesce opportunity between two nodes.
-  void CreateCoalesceOpportunity(InterferenceNode* a,
-                                 InterferenceNode* b,
-                                 CoalesceKind kind,
-                                 size_t position);
-
-  // Add an edge in the interference graph, if valid.
-  // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion
-  // when possible.
-  void AddPotentialInterference(InterferenceNode* from,
-                                InterferenceNode* to,
-                                bool guaranteed_not_interfering_yet,
-                                bool both_directions = true);
-
-  // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors)
-  // may be pruned from the interference graph.
-  void FreezeMoves(InterferenceNode* node);
-
-  // Prune a node from the interference graph, updating worklists if necessary.
-  void PruneNode(InterferenceNode* node);
-
-  // Add coalesce opportunities associated with this node to the coalesce worklist.
-  void EnableCoalesceOpportunities(InterferenceNode* node);
-
-  // If needed, from `node` from the freeze worklist to the simplify worklist.
-  void CheckTransitionFromFreezeWorklist(InterferenceNode* node);
-
-  // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively.
-  bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
-
-  // Return true if `from` and `into` are uncolored, and can be coalesced conservatively.
-  bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into);
-
-  void Coalesce(CoalesceOpportunity* opportunity);
-
-  // Merge `from` into `into` in the interference graph.
-  void Combine(InterferenceNode* from, InterferenceNode* into);
-
-  // A reference to the register allocator instance,
-  // needed to split intervals and assign spill slots.
-  RegisterAllocatorGraphColor* register_allocator_;
-
-  // A scoped arena allocator used for a single graph coloring attempt.
-  ScopedArenaAllocator* allocator_;
-
-  const bool processing_core_regs_;
-
-  const size_t num_regs_;
-
-  // A map from live intervals to interference nodes.
-  ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_;
-
-  // Uncolored nodes that should be pruned from the interference graph.
-  ScopedArenaVector<InterferenceNode*> prunable_nodes_;
-
-  // A stack of nodes pruned from the interference graph, waiting to be pruned.
-  ScopedArenaStdStack<InterferenceNode*> pruned_nodes_;
-
-  // A queue containing low degree, non-move-related nodes that can pruned immediately.
-  ScopedArenaDeque<InterferenceNode*> simplify_worklist_;
-
-  // A queue containing low degree, move-related nodes.
-  ScopedArenaDeque<InterferenceNode*> freeze_worklist_;
-
-  // A queue containing high degree nodes.
-  // If we have to prune from the spill worklist, we cannot guarantee
-  // the pruned node a color, so we order the worklist by priority.
-  ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_;
-
-  // A queue containing coalesce opportunities.
-  // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those
-  // inside of loops) are more important than others.
-  ScopedArenaPriorityQueue<CoalesceOpportunity*,
-                           decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_;
-
-  // Storage for links to adjacent nodes for interference nodes.
-  // Using std::deque so that elements do not move when adding new ones.
-  ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_;
-
-  // Storage for links to coalesce opportunities for interference nodes.
-  // Using std::deque so that elements do not move when adding new ones.
-  ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_;
-
-  DISALLOW_COPY_AND_ASSIGN(ColoringIteration);
-};
-
-static bool IsCoreInterval(LiveInterval* interval) {
-  return !DataType::IsFloatingPointType(interval->GetType());
-}
-
-static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) {
-  return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize;
-}
-
-RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
-                                                         CodeGenerator* codegen,
-                                                         const SsaLivenessAnalysis& liveness,
-                                                         bool iterative_move_coalescing)
-      : RegisterAllocator(allocator, codegen, liveness),
-        iterative_move_coalescing_(iterative_move_coalescing),
-        core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)),
-        num_int_spill_slots_(0),
-        num_double_spill_slots_(0),
-        num_float_spill_slots_(0),
-        num_long_spill_slots_(0),
-        catch_phi_spill_slot_counter_(0),
-        reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)),
-        reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) {
-  // Before we ask for blocked registers, set them up in the code generator.
-  codegen->SetupBlockedRegisters();
-
-  // Initialize physical core register live intervals and blocked registers.
-  // This includes globally blocked registers, such as the stack pointer.
-  physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr);
-  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
-    LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32);
-    physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
-    physical_core_nodes_[i]->stage = NodeStage::kPrecolored;
-    core_intervals_.push_back(interval);
-    if (codegen_->IsBlockedCoreRegister(i)) {
-      interval->AddRange(0, liveness.GetMaxLifetimePosition());
-    }
-  }
-  // Initialize physical floating point register live intervals and blocked registers.
-  physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr);
-  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
-    LiveInterval* interval =
-        LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32);
-    physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness);
-    physical_fp_nodes_[i]->stage = NodeStage::kPrecolored;
-    fp_intervals_.push_back(interval);
-    if (codegen_->IsBlockedFloatingPointRegister(i)) {
-      interval->AddRange(0, liveness.GetMaxLifetimePosition());
-    }
-  }
-}
-
-RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {}
-
-void RegisterAllocatorGraphColor::AllocateRegisters() {
-  // (1) Collect and prepare live intervals.
-  ProcessInstructions();
-
-  for (bool processing_core_regs : {true, false}) {
-    ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs
-        ? core_intervals_
-        : fp_intervals_;
-    size_t num_registers = processing_core_regs
-        ? codegen_->GetNumberOfCoreRegisters()
-        : codegen_->GetNumberOfFloatingPointRegisters();
-
-    size_t attempt = 0;
-    while (true) {
-      ++attempt;
-      DCHECK(attempt <= kMaxGraphColoringAttemptsDebug)
-          << "Exceeded debug max graph coloring register allocation attempts. "
-          << "This could indicate that the register allocator is not making forward progress, "
-          << "which could be caused by prioritizing the wrong live intervals. (Short intervals "
-          << "should be prioritized over long ones, because they cannot be split further.)";
-
-      // Many data structures are cleared between graph coloring attempts, so we reduce
-      // total memory usage by using a new scoped arena allocator for each attempt.
-      ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack());
-      ColoringIteration iteration(this,
-                                  &coloring_attempt_allocator,
-                                  processing_core_regs,
-                                  num_registers);
-
-      // (2) Build the interference graph.
-      ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
-          ? physical_core_nodes_
-          : physical_fp_nodes_;
-      iteration.BuildInterferenceGraph(intervals, physical_nodes);
-
-      // (3) Add coalesce opportunities.
-      //     If we have tried coloring the graph a suspiciously high number of times, give
-      //     up on move coalescing, just in case the coalescing heuristics are not conservative.
-      //     (This situation will be caught if DCHECKs are turned on.)
-      if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) {
-        iteration.FindCoalesceOpportunities();
-      }
-
-      // (4) Prune all uncolored nodes from interference graph.
-      iteration.PruneInterferenceGraph();
-
-      // (5) Color pruned nodes based on interferences.
-      bool successful = iteration.ColorInterferenceGraph();
-
-      // We manually clear coalesce opportunities for physical nodes,
-      // since they persist across coloring attempts.
-      for (InterferenceNode* node : physical_core_nodes_) {
-        node->ClearCoalesceOpportunities();
-      }
-      for (InterferenceNode* node : physical_fp_nodes_) {
-        node->ClearCoalesceOpportunities();
-      }
-
-      if (successful) {
-        // Assign spill slots.
-        AllocateSpillSlots(iteration.GetPrunableNodes());
-
-        // Tell the code generator which registers were allocated.
-        // We only look at prunable_nodes because we already told the code generator about
-        // fixed intervals while processing instructions. We also ignore the fixed intervals
-        // placed at the top of catch blocks.
-        for (InterferenceNode* node : iteration.GetPrunableNodes()) {
-          LiveInterval* interval = node->GetInterval();
-          if (interval->HasRegister()) {
-            Location low_reg = processing_core_regs
-                ? Location::RegisterLocation(interval->GetRegister())
-                : Location::FpuRegisterLocation(interval->GetRegister());
-            codegen_->AddAllocatedRegister(low_reg);
-            if (interval->HasHighInterval()) {
-              LiveInterval* high = interval->GetHighInterval();
-              DCHECK(high->HasRegister());
-              Location high_reg = processing_core_regs
-                  ? Location::RegisterLocation(high->GetRegister())
-                  : Location::FpuRegisterLocation(high->GetRegister());
-              codegen_->AddAllocatedRegister(high_reg);
-            }
-          } else {
-            DCHECK_IMPLIES(interval->HasHighInterval(),
-                           !interval->GetHighInterval()->HasRegister());
-          }
-        }
-
-        break;
-      }
-    }  // while unsuccessful
-  }  // for processing_core_instructions
-
-  // (6) Resolve locations and deconstruct SSA form.
-  RegisterAllocationResolver(codegen_, liveness_)
-      .Resolve(ArrayRef<HInstruction* const>(safepoints_),
-               reserved_art_method_slots_ + reserved_out_slots_,
-               num_int_spill_slots_,
-               num_long_spill_slots_,
-               num_float_spill_slots_,
-               num_double_spill_slots_,
-               catch_phi_spill_slot_counter_,
-               ArrayRef<LiveInterval* const>(temp_intervals_));
-
-  if (kIsDebugBuild) {
-    Validate(/*log_fatal_on_failure*/ true);
-  }
-}
-
-bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) {
-  for (bool processing_core_regs : {true, false}) {
-    ScopedArenaAllocator allocator(allocator_->GetArenaStack());
-    ScopedArenaVector<LiveInterval*> intervals(
-        allocator.Adapter(kArenaAllocRegisterAllocatorValidate));
-    for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) {
-      HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i);
-      LiveInterval* interval = instruction->GetLiveInterval();
-      if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) {
-        intervals.push_back(instruction->GetLiveInterval());
-      }
-    }
-
-    ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs
-        ? physical_core_nodes_
-        : physical_fp_nodes_;
-    for (InterferenceNode* fixed : physical_nodes) {
-      LiveInterval* interval = fixed->GetInterval();
-      if (interval->GetFirstRange() != nullptr) {
-        // Ideally we would check fixed ranges as well, but currently there are times when
-        // two fixed intervals for the same register will overlap. For example, a fixed input
-        // and a fixed output may sometimes share the same register, in which there will be two
-        // fixed intervals for the same place.
-      }
-    }
-
-    for (LiveInterval* temp : temp_intervals_) {
-      if (IsCoreInterval(temp) == processing_core_regs) {
-        intervals.push_back(temp);
-      }
-    }
-
-    size_t spill_slots = num_int_spill_slots_
-                       + num_long_spill_slots_
-                       + num_float_spill_slots_
-                       + num_double_spill_slots_
-                       + catch_phi_spill_slot_counter_;
-    bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
-                                spill_slots,
-                                reserved_art_method_slots_ + reserved_out_slots_,
-                                *codegen_,
-                                processing_core_regs,
-                                log_fatal_on_failure);
-    if (!ok) {
-      return false;
-    }
-  }  // for processing_core_regs
-
-  return true;
-}
-
-void RegisterAllocatorGraphColor::ProcessInstructions() {
-  for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
-    // Note that we currently depend on this ordering, since some helper
-    // code is designed for linear scan register allocation.
-    for (HBackwardInstructionIterator instr_it(block->GetInstructions());
-          !instr_it.Done();
-          instr_it.Advance()) {
-      ProcessInstruction(instr_it.Current());
-    }
-
-    for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
-      ProcessInstruction(phi_it.Current());
-    }
-
-    if (block->IsCatchBlock()
-        || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) {
-      // By blocking all registers at the top of each catch block or irreducible loop, we force
-      // intervals belonging to the live-in set of the catch/header block to be spilled.
-      // TODO(ngeoffray): Phis in this block could be allocated in register.
-      size_t position = block->GetLifetimeStart();
-      BlockRegisters(position, position + 1);
-    }
-  }
-}
-
-bool RegisterAllocatorGraphColor::TryRemoveSuspendCheckEntry(HInstruction* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (instruction->IsSuspendCheckEntry() && !codegen_->NeedsSuspendCheckEntry()) {
-    // TODO: We do this here because we do not want the suspend check to artificially
-    // create live registers. We should find another place, but this is currently the
-    // simplest.
-    DCHECK_EQ(locations->GetTempCount(), 0u);
-    instruction->GetBlock()->RemoveInstruction(instruction);
-    return true;
-  }
-  return false;
-}
-
-void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  if (locations == nullptr) {
-    return;
-  }
-  if (TryRemoveSuspendCheckEntry(instruction)) {
-    return;
-  }
-
-  CheckForTempLiveIntervals(instruction);
-  CheckForSafepoint(instruction);
-  if (locations->WillCall()) {
-    // If a call will happen, create fixed intervals for caller-save registers.
-    // TODO: Note that it may be beneficial to later split intervals at this point,
-    //       so that we allow last-minute moves from a caller-save register
-    //       to a callee-save register.
-    BlockRegisters(instruction->GetLifetimePosition(),
-                   instruction->GetLifetimePosition() + 1,
-                   /*caller_save_only*/ true);
-  }
-  CheckForFixedInputs(instruction);
-
-  LiveInterval* interval = instruction->GetLiveInterval();
-  if (interval == nullptr) {
-    // Instructions lacking a valid output location do not have a live interval.
-    DCHECK(!locations->Out().IsValid());
-    return;
-  }
-
-  // Low intervals act as representatives for their corresponding high interval.
-  DCHECK(!interval->IsHighInterval());
-  if (codegen_->NeedsTwoRegisters(interval->GetType())) {
-    interval->AddHighInterval();
-  }
-  AddSafepointsFor(instruction);
-  CheckForFixedOutput(instruction);
-  AllocateSpillSlotForCatchPhi(instruction);
-
-  ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval)
-      ? core_intervals_
-      : fp_intervals_;
-  if (interval->HasSpillSlot() || instruction->IsConstant()) {
-    // Note that if an interval already has a spill slot, then its value currently resides
-    // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first
-    // register use. This is also true for constants, which can be materialized at any point.
-    size_t first_register_use = interval->FirstRegisterUse();
-    if (first_register_use != kNoLifetime) {
-      LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1);
-      intervals.push_back(split);
-    } else {
-      // We won't allocate a register for this value.
-    }
-  } else {
-    intervals.push_back(interval);
-  }
-}
-
-void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) {
-  // We simply block physical registers where necessary.
-  // TODO: Ideally we would coalesce the physical register with the register
-  //       allocated to the input value, but this can be tricky if, e.g., there
-  //       could be multiple physical register uses of the same value at the
-  //       same instruction. Furthermore, there's currently no distinction between
-  //       fixed inputs to a call (which will be clobbered) and other fixed inputs (which
-  //       may not be clobbered).
-  LocationSummary* locations = instruction->GetLocations();
-  size_t position = instruction->GetLifetimePosition();
-  for (size_t i = 0; i < locations->GetInputCount(); ++i) {
-    Location input = locations->InAt(i);
-    if (input.IsRegister() || input.IsFpuRegister()) {
-      BlockRegister(input, position, position + 1);
-      codegen_->AddAllocatedRegister(input);
-    } else if (input.IsPair()) {
-      BlockRegister(input.ToLow(), position, position + 1);
-      BlockRegister(input.ToHigh(), position, position + 1);
-      codegen_->AddAllocatedRegister(input.ToLow());
-      codegen_->AddAllocatedRegister(input.ToHigh());
-    }
-  }
-}
-
-void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) {
-  // If an instruction has a fixed output location, we give the live interval a register and then
-  // proactively split it just after the definition point to avoid creating too many interferences
-  // with a fixed node.
-  LiveInterval* interval = instruction->GetLiveInterval();
-  Location out = interval->GetDefinedBy()->GetLocations()->Out();
-  size_t position = instruction->GetLifetimePosition();
-  DCHECK_GE(interval->GetEnd() - position, 2u);
-
-  if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
-    out = instruction->GetLocations()->InAt(0);
-  }
-
-  if (out.IsRegister() || out.IsFpuRegister()) {
-    interval->SetRegister(out.reg());
-    codegen_->AddAllocatedRegister(out);
-    Split(interval, position + 1);
-  } else if (out.IsPair()) {
-    interval->SetRegister(out.low());
-    interval->GetHighInterval()->SetRegister(out.high());
-    codegen_->AddAllocatedRegister(out.ToLow());
-    codegen_->AddAllocatedRegister(out.ToHigh());
-    Split(interval, position + 1);
-  } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) {
-    interval->SetSpillSlot(out.GetStackIndex());
-  } else {
-    DCHECK(out.IsUnallocated() || out.IsConstant());
-  }
-}
-
-void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) {
-  LiveInterval* interval = instruction->GetLiveInterval();
-  for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
-    HInstruction* safepoint = safepoints_[safepoint_index - 1u];
-    size_t safepoint_position = safepoint->GetLifetimePosition();
-
-    // Test that safepoints_ are ordered in the optimal way.
-    DCHECK(safepoint_index == safepoints_.size() ||
-           safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position);
-
-    if (safepoint_position == interval->GetStart()) {
-      // The safepoint is for this instruction, so the location of the instruction
-      // does not need to be saved.
-      DCHECK_EQ(safepoint_index, safepoints_.size());
-      DCHECK_EQ(safepoint, instruction);
-      continue;
-    } else if (interval->IsDeadAt(safepoint_position)) {
-      break;
-    } else if (!interval->Covers(safepoint_position)) {
-      // Hole in the interval.
-      continue;
-    }
-    interval->AddSafepoint(safepoint);
-  }
-}
-
-void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  size_t position = instruction->GetLifetimePosition();
-  for (size_t i = 0; i < locations->GetTempCount(); ++i) {
-    Location temp = locations->GetTemp(i);
-    if (temp.IsRegister() || temp.IsFpuRegister()) {
-      BlockRegister(temp, position, position + 1);
-      codegen_->AddAllocatedRegister(temp);
-    } else {
-      DCHECK(temp.IsUnallocated());
-      switch (temp.GetPolicy()) {
-        case Location::kRequiresRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32);
-          interval->AddTempUse(instruction, i);
-          core_intervals_.push_back(interval);
-          temp_intervals_.push_back(interval);
-          break;
-        }
-
-        case Location::kRequiresFpuRegister: {
-          LiveInterval* interval =
-              LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64);
-          interval->AddTempUse(instruction, i);
-          fp_intervals_.push_back(interval);
-          temp_intervals_.push_back(interval);
-          if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
-            interval->AddHighInterval(/*is_temp*/ true);
-            temp_intervals_.push_back(interval->GetHighInterval());
-          }
-          break;
-        }
-
-        default:
-          LOG(FATAL) << "Unexpected policy for temporary location "
-                     << temp.GetPolicy();
-      }
-    }
-  }
-}
-
-void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-
-  if (locations->NeedsSafepoint()) {
-    safepoints_.push_back(instruction);
-  }
-}
-
-LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) {
-  if (interval->GetStart() < position && position < interval->GetEnd()) {
-    return Split(interval, position);
-  } else {
-    return interval;
-  }
-}
-
-void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) {
-  DCHECK(!interval->IsHighInterval());
-
-  // Split just after a register definition.
-  if (interval->IsParent() && interval->DefinitionRequiresRegister()) {
-    interval = TrySplit(interval, interval->GetStart() + 1);
-  }
-
-  // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
-  // [interval->GetStart(), interval->GetEnd() + 1)
-  auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
-                                                 interval->GetUses().end(),
-                                                 interval->GetStart(),
-                                                 interval->GetEnd() + 1u);
-  // Split around register uses.
-  for (const UsePosition& use : matching_use_range) {
-    if (use.RequiresRegister()) {
-      size_t position = use.GetPosition();
-      interval = TrySplit(interval, position - 1);
-      if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
-        // If we are at the very end of a basic block, we cannot split right
-        // at the use. Split just after instead.
-        interval = TrySplit(interval, position + 1);
-      } else {
-        interval = TrySplit(interval, position);
-      }
-    }
-  }
-}
-
-void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) {
-  if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) {
-    HPhi* phi = instruction->AsPhi();
-    LiveInterval* interval = phi->GetLiveInterval();
-
-    HInstruction* previous_phi = phi->GetPrevious();
-    DCHECK(previous_phi == nullptr ||
-           previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
-        << "Phis expected to be sorted by vreg number, "
-        << "so that equivalent phis are adjacent.";
-
-    if (phi->IsVRegEquivalentOf(previous_phi)) {
-      // Assign the same spill slot.
-      DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot());
-      interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
-    } else {
-      interval->SetSpillSlot(catch_phi_spill_slot_counter_);
-      catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
-    }
-  }
-}
-
-void RegisterAllocatorGraphColor::BlockRegister(Location location,
-                                                size_t start,
-                                                size_t end) {
-  DCHECK(location.IsRegister() || location.IsFpuRegister());
-  int reg = location.reg();
-  LiveInterval* interval = location.IsRegister()
-      ? physical_core_nodes_[reg]->GetInterval()
-      : physical_fp_nodes_[reg]->GetInterval();
-  DCHECK(interval->GetRegister() == reg);
-  bool blocked_by_codegen = location.IsRegister()
-      ? codegen_->IsBlockedCoreRegister(reg)
-      : codegen_->IsBlockedFloatingPointRegister(reg);
-  if (blocked_by_codegen) {
-    // We've already blocked this register for the entire method. (And adding a
-    // range inside another range violates the preconditions of AddRange).
-  } else {
-    interval->AddRange(start, end);
-  }
-}
-
-void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) {
-  for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) {
-      BlockRegister(Location::RegisterLocation(i), start, end);
-    }
-  }
-  for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) {
-    if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) {
-      BlockRegister(Location::FpuRegisterLocation(i), start, end);
-    }
-  }
-}
-
-void ColoringIteration::AddPotentialInterference(InterferenceNode* from,
-                                                 InterferenceNode* to,
-                                                 bool guaranteed_not_interfering_yet,
-                                                 bool both_directions) {
-  if (from->IsPrecolored()) {
-    // We save space by ignoring outgoing edges from fixed nodes.
-  } else if (to->IsPrecolored()) {
-    // It is important that only a single node represents a given fixed register in the
-    // interference graph. We retrieve that node here.
-    const ScopedArenaVector<InterferenceNode*>& physical_nodes =
-        to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_
-                                             : register_allocator_->physical_core_nodes_;
-    InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()];
-    from->AddInterference(
-        physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
-    DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister());
-    DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node";
-
-    // If a node interferes with a fixed pair node, the weight of the edge may
-    // be inaccurate after using the alias of the pair node, because the alias of the pair node
-    // is a singular node.
-    // We could make special pair fixed nodes, but that ends up being too conservative because
-    // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of
-    // three rather than two.
-    // Instead, we explicitly add an interference with the high node of the fixed pair node.
-    // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals
-    //       can be unaligned on x86 complicates things.
-    if (to->IsPair()) {
-      InterferenceNode* high_node =
-          physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()];
-      DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(),
-                high_node->GetInterval()->GetRegister());
-      from->AddInterference(
-          high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_);
-    }
-  } else {
-    // Standard interference between two uncolored nodes.
-    from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_);
-  }
-
-  if (both_directions) {
-    AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false);
-  }
-}
-
-// Returns true if `in_node` represents an input interval of `out_node`, and the output interval
-// is allowed to have the same register as the input interval.
-// TODO: Ideally we should just produce correct intervals in liveness analysis.
-//       We would need to refactor the current live interval layout to do so, which is
-//       no small task.
-static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) {
-  LiveInterval* output_interval = out_node->GetInterval();
-  HInstruction* defined_by = output_interval->GetDefinedBy();
-  if (defined_by == nullptr) {
-    // This must not be a definition point.
-    return false;
-  }
-
-  LocationSummary* locations = defined_by->GetLocations();
-  if (locations->OutputCanOverlapWithInputs()) {
-    // This instruction does not allow the output to reuse a register from an input.
-    return false;
-  }
-
-  LiveInterval* input_interval = in_node->GetInterval();
-  LiveInterval* next_sibling = input_interval->GetNextSibling();
-  size_t def_position = defined_by->GetLifetimePosition();
-  size_t use_position = def_position + 1;
-  if (next_sibling != nullptr && next_sibling->GetStart() == use_position) {
-    // The next sibling starts at the use position, so reusing the input register in the output
-    // would clobber the input before it's moved into the sibling interval location.
-    return false;
-  }
-
-  if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) {
-    // The input interval is live after the use position.
-    return false;
-  }
-
-  HInputsRef inputs = defined_by->GetInputs();
-  for (size_t i = 0; i < inputs.size(); ++i) {
-    if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) {
-      DCHECK(input_interval->SameRegisterKind(*output_interval));
-      return true;
-    }
-  }
-
-  // The input interval was not an input for this instruction.
-  return false;
-}
-
-void ColoringIteration::BuildInterferenceGraph(
-    const ScopedArenaVector<LiveInterval*>& intervals,
-    const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
-  DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
-  // Build the interference graph efficiently by ordering range endpoints
-  // by position and doing a linear sweep to find interferences. (That is, we
-  // jump from endpoint to endpoint, maintaining a set of intervals live at each
-  // point. If two nodes are ever in the live set at the same time, then they
-  // interfere with each other.)
-  //
-  // We order by both position and (secondarily) by whether the endpoint
-  // begins or ends a range; we want to process range endings before range
-  // beginnings at the same position because they should not conflict.
-  //
-  // For simplicity, we create a tuple for each endpoint, and then sort the tuples.
-  // Tuple contents: (position, is_range_beginning, node).
-  ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints(
-      allocator_->Adapter(kArenaAllocRegisterAllocator));
-
-  // We reserve plenty of space to avoid excessive copying.
-  range_endpoints.reserve(4 * prunable_nodes_.size());
-
-  for (LiveInterval* parent : intervals) {
-    for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) {
-      LiveRange* range = sibling->GetFirstRange();
-      if (range != nullptr) {
-        InterferenceNode* node =
-            new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
-        interval_node_map_.insert(std::make_pair(sibling, node));
-
-        if (sibling->HasRegister()) {
-          // Fixed nodes should alias the canonical node for the corresponding register.
-          node->stage = NodeStage::kPrecolored;
-          InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()];
-          node->SetAlias(physical_node);
-          DCHECK_EQ(node->GetInterval()->GetRegister(),
-                    physical_node->GetInterval()->GetRegister());
-        } else {
-          node->stage = NodeStage::kPrunable;
-          prunable_nodes_.push_back(node);
-        }
-
-        while (range != nullptr) {
-          range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node));
-          range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node));
-          range = range->GetNext();
-        }
-      }
-    }
-  }
-
-  // Sort the endpoints.
-  // We explicitly ignore the third entry of each tuple (the node pointer) in order
-  // to maintain determinism.
-  std::sort(range_endpoints.begin(), range_endpoints.end(),
-            [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs,
-                const std::tuple<size_t, bool, InterferenceNode*>& rhs) {
-    return std::tie(std::get<0>(lhs), std::get<1>(lhs))
-         < std::tie(std::get<0>(rhs), std::get<1>(rhs));
-  });
-
-  // Nodes live at the current position in the linear sweep.
-  ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator));
-
-  // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the
-  // live set. When we encounter the end of a range, we remove the corresponding node
-  // from the live set. Nodes interfere if they are in the live set at the same time.
-  for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) {
-    bool is_range_beginning;
-    InterferenceNode* node;
-    size_t position;
-    // Extract information from the tuple, including the node this tuple represents.
-    std::tie(position, is_range_beginning, node) = *it;
-
-    if (is_range_beginning) {
-      bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart();
-      for (InterferenceNode* conflicting : live) {
-        DCHECK_NE(node, conflicting);
-        if (CheckInputOutputCanOverlap(conflicting, node)) {
-          // We do not add an interference, because the instruction represented by `node` allows
-          // its output to share a register with an input, represented here by `conflicting`.
-        } else {
-          AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet);
-        }
-      }
-      DCHECK(std::find(live.begin(), live.end(), node) == live.end());
-      live.push_back(node);
-    } else {
-      // End of range.
-      auto live_it = std::find(live.begin(), live.end(), node);
-      DCHECK(live_it != live.end());
-      live.erase(live_it);
-    }
-  }
-  DCHECK(live.empty());
-}
-
-void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a,
-                                                  InterferenceNode* b,
-                                                  CoalesceKind kind,
-                                                  size_t position) {
-  DCHECK_EQ(a->IsPair(), b->IsPair())
-      << "Nodes of different memory widths should never be coalesced";
-  CoalesceOpportunity* opportunity =
-      new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_);
-  a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
-  b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
-  coalesce_worklist_.push(opportunity);
-}
-
-// When looking for coalesce opportunities, we use the interval_node_map_ to find the node
-// corresponding to an interval. Note that not all intervals are in this map, notably the parents
-// of constants and stack arguments. (However, these interval should not be involved in coalesce
-// opportunities anyway, because they're not going to be in registers.)
-void ColoringIteration::FindCoalesceOpportunities() {
-  DCHECK(coalesce_worklist_.empty());
-
-  for (InterferenceNode* node : prunable_nodes_) {
-    LiveInterval* interval = node->GetInterval();
-
-    // Coalesce siblings.
-    LiveInterval* next_sibling = interval->GetNextSibling();
-    if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
-      auto it = interval_node_map_.find(next_sibling);
-      if (it != interval_node_map_.end()) {
-        InterferenceNode* sibling_node = it->second;
-        CreateCoalesceOpportunity(node,
-                                  sibling_node,
-                                  CoalesceKind::kAdjacentSibling,
-                                  interval->GetEnd());
-      }
-    }
-
-    // Coalesce fixed outputs with this interval if this interval is an adjacent sibling.
-    LiveInterval* parent = interval->GetParent();
-    if (parent->HasRegister()
-        && parent->GetNextSibling() == interval
-        && parent->GetEnd() == interval->GetStart()) {
-      auto it = interval_node_map_.find(parent);
-      if (it != interval_node_map_.end()) {
-        InterferenceNode* parent_node = it->second;
-        CreateCoalesceOpportunity(node,
-                                  parent_node,
-                                  CoalesceKind::kFixedOutputSibling,
-                                  parent->GetEnd());
-      }
-    }
-
-    // Try to prevent moves across blocks.
-    // Note that this does not lead to many succeeding coalesce attempts, so could be removed
-    // if found to add to compile time.
-    const SsaLivenessAnalysis& liveness = register_allocator_->liveness_;
-    if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) {
-      // If the start of this interval is at a block boundary, we look at the
-      // location of the interval in blocks preceding the block this interval
-      // starts at. This can avoid a move between the two blocks.
-      HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2);
-      for (HBasicBlock* predecessor : block->GetPredecessors()) {
-        size_t position = predecessor->GetLifetimeEnd() - 1;
-        LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
-        if (existing != nullptr) {
-          auto it = interval_node_map_.find(existing);
-          if (it != interval_node_map_.end()) {
-            InterferenceNode* existing_node = it->second;
-            CreateCoalesceOpportunity(node,
-                                      existing_node,
-                                      CoalesceKind::kNonlinearControlFlow,
-                                      position);
-          }
-        }
-      }
-    }
-
-    // Coalesce phi inputs with the corresponding output.
-    HInstruction* defined_by = interval->GetDefinedBy();
-    if (defined_by != nullptr && defined_by->IsPhi()) {
-      ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors());
-      HInputsRef inputs = defined_by->GetInputs();
-
-      for (size_t i = 0, e = inputs.size(); i < e; ++i) {
-        // We want the sibling at the end of the appropriate predecessor block.
-        size_t position = predecessors[i]->GetLifetimeEnd() - 1;
-        LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
-
-        auto it = interval_node_map_.find(input_interval);
-        if (it != interval_node_map_.end()) {
-          InterferenceNode* input_node = it->second;
-          CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
-        }
-      }
-    }
-
-    // Coalesce output with first input when policy is kSameAsFirstInput.
-    if (defined_by != nullptr) {
-      Location out = defined_by->GetLocations()->Out();
-      if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) {
-        LiveInterval* input_interval
-            = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
-        // TODO: Could we consider lifetime holes here?
-        if (input_interval->GetEnd() == interval->GetStart()) {
-          auto it = interval_node_map_.find(input_interval);
-          if (it != interval_node_map_.end()) {
-            InterferenceNode* input_node = it->second;
-            CreateCoalesceOpportunity(node,
-                                      input_node,
-                                      CoalesceKind::kFirstInput,
-                                      interval->GetStart());
-          }
-        }
-      }
-    }
-
-    // An interval that starts an instruction (that is, it is not split), may
-    // re-use the registers used by the inputs of that instruction, based on the
-    // location summary.
-    if (defined_by != nullptr) {
-      DCHECK(!interval->IsSplit());
-      LocationSummary* locations = defined_by->GetLocations();
-      if (!locations->OutputCanOverlapWithInputs()) {
-        HInputsRef inputs = defined_by->GetInputs();
-        for (size_t i = 0; i < inputs.size(); ++i) {
-          size_t def_point = defined_by->GetLifetimePosition();
-          // TODO: Getting the sibling at the def_point might not be quite what we want
-          //       for fixed inputs, since the use will be *at* the def_point rather than after.
-          LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
-          if (input_interval != nullptr &&
-              input_interval->HasHighInterval() == interval->HasHighInterval()) {
-            auto it = interval_node_map_.find(input_interval);
-            if (it != interval_node_map_.end()) {
-              InterferenceNode* input_node = it->second;
-              CreateCoalesceOpportunity(node,
-                                        input_node,
-                                        CoalesceKind::kAnyInput,
-                                        interval->GetStart());
-            }
-          }
-        }
-      }
-    }
-
-    // Try to prevent moves into fixed input locations.
-    // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
-    // [interval->GetStart() + 1, interval->GetEnd() + 1)
-    auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
-                                                   interval->GetUses().end(),
-                                                   interval->GetStart() + 1u,
-                                                   interval->GetEnd() + 1u);
-    for (const UsePosition& use : matching_use_range) {
-      HInstruction* user = use.GetUser();
-      if (user == nullptr) {
-        // User may be null for certain intervals, such as temp intervals.
-        continue;
-      }
-      LocationSummary* locations = user->GetLocations();
-      Location input = locations->InAt(use.GetInputIndex());
-      if (input.IsRegister() || input.IsFpuRegister()) {
-        // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
-        //       is currently not supported.
-        InterferenceNode* fixed_node = input.IsRegister()
-            ? register_allocator_->physical_core_nodes_[input.reg()]
-            : register_allocator_->physical_fp_nodes_[input.reg()];
-        CreateCoalesceOpportunity(node,
-                                  fixed_node,
-                                  CoalesceKind::kFixedInput,
-                                  user->GetLifetimePosition());
-      }
-    }
-  }  // for node in prunable_nodes
-}
-
-static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) {
-  return node->GetOutDegree() < num_regs;
-}
-
-static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) {
-  return !IsLowDegreeNode(node, num_regs);
-}
-
-void ColoringIteration::PruneInterferenceGraph() {
-  DCHECK(pruned_nodes_.empty()
-      && simplify_worklist_.empty()
-      && freeze_worklist_.empty()
-      && spill_worklist_.empty());
-  // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes,
-  // and all others as high degree nodes. The distinction is important: low degree nodes are
-  // guaranteed a color, while high degree nodes are not.
-
-  // Build worklists. Note that the coalesce worklist has already been
-  // filled by FindCoalesceOpportunities().
-  for (InterferenceNode* node : prunable_nodes_) {
-    DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned";
-    if (IsLowDegreeNode(node, num_regs_)) {
-      if (node->GetCoalesceOpportunities().empty()) {
-        // Simplify Worklist.
-        node->stage = NodeStage::kSimplifyWorklist;
-        simplify_worklist_.push_back(node);
-      } else {
-        // Freeze Worklist.
-        node->stage = NodeStage::kFreezeWorklist;
-        freeze_worklist_.push_back(node);
-      }
-    } else {
-      // Spill worklist.
-      node->stage = NodeStage::kSpillWorklist;
-      spill_worklist_.push(node);
-    }
-  }
-
-  // Prune graph.
-  // Note that we do not remove a node from its current worklist if it moves to another, so it may
-  // be in multiple worklists at once; the node's `phase` says which worklist it is really in.
-  while (true) {
-    if (!simplify_worklist_.empty()) {
-      // Prune low-degree nodes.
-      // TODO: pop_back() should work as well, but it didn't; we get a
-      //       failed check while pruning. We should look into this.
-      InterferenceNode* node = simplify_worklist_.front();
-      simplify_worklist_.pop_front();
-      DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list";
-      DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree";
-      DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related";
-      PruneNode(node);
-    } else if (!coalesce_worklist_.empty()) {
-      // Coalesce.
-      CoalesceOpportunity* opportunity = coalesce_worklist_.top();
-      coalesce_worklist_.pop();
-      if (opportunity->stage == CoalesceStage::kWorklist) {
-        Coalesce(opportunity);
-      }
-    } else if (!freeze_worklist_.empty()) {
-      // Freeze moves and prune a low-degree move-related node.
-      InterferenceNode* node = freeze_worklist_.front();
-      freeze_worklist_.pop_front();
-      if (node->stage == NodeStage::kFreezeWorklist) {
-        DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree";
-        DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related";
-        FreezeMoves(node);
-        PruneNode(node);
-      }
-    } else if (!spill_worklist_.empty()) {
-      // We spill the lowest-priority node, because pruning a node earlier
-      // gives it a higher chance of being spilled.
-      InterferenceNode* node = spill_worklist_.top();
-      spill_worklist_.pop();
-      if (node->stage == NodeStage::kSpillWorklist) {
-        DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree";
-        FreezeMoves(node);
-        PruneNode(node);
-      }
-    } else {
-      // Pruning complete.
-      break;
-    }
-  }
-  DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size());
-}
-
-void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) {
-  for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
-    if (opportunity->stage == CoalesceStage::kActive) {
-      opportunity->stage = CoalesceStage::kWorklist;
-      coalesce_worklist_.push(opportunity);
-    }
-  }
-}
-
-void ColoringIteration::PruneNode(InterferenceNode* node) {
-  DCHECK_NE(node->stage, NodeStage::kPruned);
-  DCHECK(!node->IsPrecolored());
-  node->stage = NodeStage::kPruned;
-  pruned_nodes_.push(node);
-
-  for (InterferenceNode* adj : node->GetAdjacentNodes()) {
-    DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes";
-
-    if (adj->IsPrecolored()) {
-      // No effect on pre-colored nodes; they're never pruned.
-    } else {
-      // Remove the interference.
-      bool was_high_degree = IsHighDegreeNode(adj, num_regs_);
-      DCHECK(adj->ContainsInterference(node))
-          << "Missing reflexive interference from non-fixed node";
-      adj->RemoveInterference(node);
-
-      // Handle transitions from high degree to low degree.
-      if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) {
-        EnableCoalesceOpportunities(adj);
-        for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) {
-          EnableCoalesceOpportunities(adj_adj);
-        }
-
-        DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist);
-        if (adj->IsMoveRelated()) {
-          adj->stage = NodeStage::kFreezeWorklist;
-          freeze_worklist_.push_back(adj);
-        } else {
-          adj->stage = NodeStage::kSimplifyWorklist;
-          simplify_worklist_.push_back(adj);
-        }
-      }
-    }
-  }
-}
-
-void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) {
-  if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) {
-    DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist);
-    node->stage = NodeStage::kSimplifyWorklist;
-    simplify_worklist_.push_back(node);
-  }
-}
-
-void ColoringIteration::FreezeMoves(InterferenceNode* node) {
-  for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
-    if (opportunity->stage == CoalesceStage::kDefunct) {
-      // Constrained moves should remain constrained, since they will not be considered
-      // during last-chance coalescing.
-    } else {
-      opportunity->stage = CoalesceStage::kInactive;
-    }
-    InterferenceNode* other = opportunity->node_a->GetAlias() == node
-        ? opportunity->node_b->GetAlias()
-        : opportunity->node_a->GetAlias();
-    if (other != node && other->stage == NodeStage::kFreezeWorklist) {
-      DCHECK(IsLowDegreeNode(node, num_regs_));
-      CheckTransitionFromFreezeWorklist(other);
-    }
-  }
-}
-
-bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from,
-                                            InterferenceNode* into) {
-  if (!into->IsPrecolored()) {
-    // The uncolored heuristic will cover this case.
-    return false;
-  }
-  if (from->IsPair() || into->IsPair()) {
-    // TODO: Merging from a pair node is currently not supported, since fixed pair nodes
-    //       are currently represented as two single fixed nodes in the graph, and `into` is
-    //       only one of them. (We may lose the implicit connections to the second one in a merge.)
-    return false;
-  }
-
-  // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`.
-  // Reasons an adjacent node `adj` can be "ok":
-  // (1) If `adj` is low degree, interference with `into` will not affect its existing
-  //     colorable guarantee. (Notice that coalescing cannot increase its degree.)
-  // (2) If `adj` is pre-colored, it already interferes with `into`. See (3).
-  // (3) If there's already an interference with `into`, coalescing will not add interferences.
-  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
-    if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) {
-      // Ok.
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from,
-                                           InterferenceNode* into) {
-  if (into->IsPrecolored()) {
-    // The pre-colored heuristic will handle this case.
-    return false;
-  }
-
-  // Arbitrary cap to improve compile time. Tests show that this has negligible affect
-  // on generated code.
-  if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) {
-    return false;
-  }
-
-  // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors
-  // of high degree. (Low degree neighbors can be ignored, because they will eventually be
-  // pruned from the interference graph in the simplify stage.)
-  size_t high_degree_interferences = 0;
-  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
-    if (IsHighDegreeNode(adj, num_regs_)) {
-      high_degree_interferences += from->EdgeWeightWith(adj);
-    }
-  }
-  for (InterferenceNode* adj : into->GetAdjacentNodes()) {
-    if (IsHighDegreeNode(adj, num_regs_)) {
-      if (from->ContainsInterference(adj)) {
-        // We've already counted this adjacent node.
-        // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that
-        // we should not have counted it at all. (This extends the textbook Briggs coalescing test,
-        // but remains conservative.)
-        if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) {
-          high_degree_interferences -= from->EdgeWeightWith(adj);
-        }
-      } else {
-        high_degree_interferences += into->EdgeWeightWith(adj);
-      }
-    }
-  }
-
-  return high_degree_interferences < num_regs_;
-}
-
-void ColoringIteration::Combine(InterferenceNode* from,
-                                InterferenceNode* into) {
-  from->SetAlias(into);
-
-  // Add interferences.
-  for (InterferenceNode* adj : from->GetAdjacentNodes()) {
-    bool was_low_degree = IsLowDegreeNode(adj, num_regs_);
-    AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false);
-    if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) {
-      // This is a (temporary) transition to a high degree node. Its degree will decrease again
-      // when we prune `from`, but it's best to be consistent about the current worklist.
-      adj->stage = NodeStage::kSpillWorklist;
-      spill_worklist_.push(adj);
-    }
-  }
-
-  // Add coalesce opportunities.
-  for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) {
-    if (opportunity->stage != CoalesceStage::kDefunct) {
-      into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_);
-    }
-  }
-  EnableCoalesceOpportunities(from);
-
-  // Prune and update worklists.
-  PruneNode(from);
-  if (IsLowDegreeNode(into, num_regs_)) {
-    // Coalesce(...) takes care of checking for a transition to the simplify worklist.
-    DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist);
-  } else if (into->stage == NodeStage::kFreezeWorklist) {
-    // This is a transition to a high degree node.
-    into->stage = NodeStage::kSpillWorklist;
-    spill_worklist_.push(into);
-  } else {
-    DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored);
-  }
-}
-
-void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) {
-  InterferenceNode* from = opportunity->node_a->GetAlias();
-  InterferenceNode* into = opportunity->node_b->GetAlias();
-  DCHECK_NE(from->stage, NodeStage::kPruned);
-  DCHECK_NE(into->stage, NodeStage::kPruned);
-
-  if (from->IsPrecolored()) {
-    // If we have one pre-colored node, make sure it's the `into` node.
-    std::swap(from, into);
-  }
-
-  if (from == into) {
-    // These nodes have already been coalesced.
-    opportunity->stage = CoalesceStage::kDefunct;
-    CheckTransitionFromFreezeWorklist(from);
-  } else if (from->IsPrecolored() || from->ContainsInterference(into)) {
-    // These nodes interfere.
-    opportunity->stage = CoalesceStage::kDefunct;
-    CheckTransitionFromFreezeWorklist(from);
-    CheckTransitionFromFreezeWorklist(into);
-  } else if (PrecoloredHeuristic(from, into)
-          || UncoloredHeuristic(from, into)) {
-    // We can coalesce these nodes.
-    opportunity->stage = CoalesceStage::kDefunct;
-    Combine(from, into);
-    CheckTransitionFromFreezeWorklist(into);
-  } else {
-    // We cannot coalesce, but we may be able to later.
-    opportunity->stage = CoalesceStage::kActive;
-  }
-}
-
-// Build a mask with a bit set for each register assigned to some
-// interval in `intervals`.
-template <typename Container>
-static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) {
-  std::bitset<kMaxNumRegs> conflict_mask;
-  for (InterferenceNode* adjacent : intervals) {
-    LiveInterval* conflicting = adjacent->GetInterval();
-    if (conflicting->HasRegister()) {
-      conflict_mask.set(conflicting->GetRegister());
-      if (conflicting->HasHighInterval()) {
-        DCHECK(conflicting->GetHighInterval()->HasRegister());
-        conflict_mask.set(conflicting->GetHighInterval()->GetRegister());
-      }
-    } else {
-      DCHECK(!conflicting->HasHighInterval()
-          || !conflicting->GetHighInterval()->HasRegister());
-    }
-  }
-  return conflict_mask;
-}
-
-bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) {
-  return processing_core_regs
-      ? !codegen_->IsCoreCalleeSaveRegister(reg)
-      : !codegen_->IsFloatingPointCalleeSaveRegister(reg);
-}
-
-static bool RegisterIsAligned(size_t reg) {
-  return reg % 2 == 0;
-}
-
-static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) {
-  // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit.
-  // Note that CTZ is undefined if all bits are 0, so we special-case it.
-  return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong());
-}
-
-bool ColoringIteration::ColorInterferenceGraph() {
-  DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small";
-  ScopedArenaVector<LiveInterval*> colored_intervals(
-      allocator_->Adapter(kArenaAllocRegisterAllocator));
-  bool successful = true;
-
-  while (!pruned_nodes_.empty()) {
-    InterferenceNode* node = pruned_nodes_.top();
-    pruned_nodes_.pop();
-    LiveInterval* interval = node->GetInterval();
-    size_t reg = 0;
-
-    InterferenceNode* alias = node->GetAlias();
-    if (alias != node) {
-      // This node was coalesced with another.
-      LiveInterval* alias_interval = alias->GetInterval();
-      if (alias_interval->HasRegister()) {
-        reg = alias_interval->GetRegister();
-        DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg])
-            << "This node conflicts with the register it was coalesced with";
-      } else {
-        DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " "
-            << "Move coalescing was not conservative, causing a node to be coalesced "
-            << "with another node that could not be colored";
-        if (interval->RequiresRegister()) {
-          successful = false;
-        }
-      }
-    } else {
-      // Search for free register(s).
-      std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes());
-      if (interval->HasHighInterval()) {
-        // Note that the graph coloring allocator assumes that pair intervals are aligned here,
-        // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we
-        // change the alignment requirements here, we will have to update the algorithm (e.g.,
-        // be more conservative about the weight of edges adjacent to pair nodes.)
-        while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) {
-          reg += 2;
-        }
-
-        // Try to use a caller-save register first.
-        for (size_t i = 0; i < num_regs_ - 1; i += 2) {
-          bool low_caller_save  = register_allocator_->IsCallerSave(i, processing_core_regs_);
-          bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_);
-          if (!conflict_mask[i] && !conflict_mask[i + 1]) {
-            if (low_caller_save && high_caller_save) {
-              reg = i;
-              break;
-            } else if (low_caller_save || high_caller_save) {
-              reg = i;
-              // Keep looking to try to get both parts in caller-save registers.
-            }
-          }
-        }
-      } else {
-        // Not a pair interval.
-        reg = FindFirstZeroInConflictMask(conflict_mask);
-
-        // Try to use caller-save registers first.
-        for (size_t i = 0; i < num_regs_; ++i) {
-          if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) {
-            reg = i;
-            break;
-          }
-        }
-      }
-
-      // Last-chance coalescing.
-      for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) {
-        if (opportunity->stage == CoalesceStage::kDefunct) {
-          continue;
-        }
-        LiveInterval* other_interval = opportunity->node_a->GetAlias() == node
-            ? opportunity->node_b->GetAlias()->GetInterval()
-            : opportunity->node_a->GetAlias()->GetInterval();
-        if (other_interval->HasRegister()) {
-          size_t coalesce_register = other_interval->GetRegister();
-          if (interval->HasHighInterval()) {
-            if (!conflict_mask[coalesce_register] &&
-                !conflict_mask[coalesce_register + 1] &&
-                RegisterIsAligned(coalesce_register)) {
-              reg = coalesce_register;
-              break;
-            }
-          } else if (!conflict_mask[coalesce_register]) {
-            reg = coalesce_register;
-            break;
-          }
-        }
-      }
-    }
-
-    if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) {
-      // Assign register.
-      DCHECK(!interval->HasRegister());
-      interval->SetRegister(reg);
-      colored_intervals.push_back(interval);
-      if (interval->HasHighInterval()) {
-        DCHECK(!interval->GetHighInterval()->HasRegister());
-        interval->GetHighInterval()->SetRegister(reg + 1);
-        colored_intervals.push_back(interval->GetHighInterval());
-      }
-    } else if (interval->RequiresRegister()) {
-      // The interference graph is too dense to color. Make it sparser by
-      // splitting this live interval.
-      successful = false;
-      register_allocator_->SplitAtRegisterUses(interval);
-      // We continue coloring, because there may be additional intervals that cannot
-      // be colored, and that we should split.
-    } else {
-      // Spill.
-      node->SetNeedsSpillSlot();
-    }
-  }
-
-  // If unsuccessful, reset all register assignments.
-  if (!successful) {
-    for (LiveInterval* interval : colored_intervals) {
-      interval->ClearRegister();
-    }
-  }
-
-  return successful;
-}
-
-void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) {
-  // The register allocation resolver will organize the stack based on value type,
-  // so we assign stack slots for each value type separately.
-  ScopedArenaAllocator allocator(allocator_->GetArenaStack());
-  ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator);
-  ScopedArenaVector<LiveInterval*> double_intervals(adapter);
-  ScopedArenaVector<LiveInterval*> long_intervals(adapter);
-  ScopedArenaVector<LiveInterval*> float_intervals(adapter);
-  ScopedArenaVector<LiveInterval*> int_intervals(adapter);
-
-  // The set of parent intervals already handled.
-  ScopedArenaSet<LiveInterval*> seen(adapter);
-
-  // Find nodes that need spill slots.
-  for (InterferenceNode* node : nodes) {
-    if (!node->NeedsSpillSlot()) {
-      continue;
-    }
-
-    LiveInterval* parent = node->GetInterval()->GetParent();
-    if (seen.find(parent) != seen.end()) {
-      // We've already handled this interval.
-      // This can happen if multiple siblings of the same interval request a stack slot.
-      continue;
-    }
-    seen.insert(parent);
-
-    HInstruction* defined_by = parent->GetDefinedBy();
-    if (parent->HasSpillSlot()) {
-      // We already have a spill slot for this value that we can reuse.
-    } else if (defined_by->IsParameterValue()) {
-      // Parameters already have a stack slot.
-      parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue()));
-    } else if (defined_by->IsCurrentMethod()) {
-      // The current method is always at stack slot 0.
-      parent->SetSpillSlot(0);
-    } else if (defined_by->IsConstant()) {
-      // Constants don't need a spill slot.
-    } else {
-      // We need to find a spill slot for this interval. Place it in the correct
-      // worklist to be processed later.
-      switch (node->GetInterval()->GetType()) {
-        case DataType::Type::kFloat64:
-          double_intervals.push_back(parent);
-          break;
-        case DataType::Type::kInt64:
-          long_intervals.push_back(parent);
-          break;
-        case DataType::Type::kFloat32:
-          float_intervals.push_back(parent);
-          break;
-        case DataType::Type::kReference:
-        case DataType::Type::kInt32:
-        case DataType::Type::kUint16:
-        case DataType::Type::kUint8:
-        case DataType::Type::kInt8:
-        case DataType::Type::kBool:
-        case DataType::Type::kInt16:
-          int_intervals.push_back(parent);
-          break;
-        case DataType::Type::kUint32:
-        case DataType::Type::kUint64:
-        case DataType::Type::kVoid:
-          LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType();
-          UNREACHABLE();
-      }
-    }
-  }
-
-  // Color spill slots for each value type.
-  ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_);
-  ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_);
-  ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_);
-  ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_);
-}
-
-void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals,
-                                                  /* out */ size_t* num_stack_slots_used) {
-  // We cannot use the original interference graph here because spill slots are assigned to
-  // all of the siblings of an interval, whereas an interference node represents only a single
-  // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints
-  // by position, and assigning the lowest spill slot available when we encounter an interval
-  // beginning. We ignore lifetime holes for simplicity.
-  ScopedArenaAllocator allocator(allocator_->GetArenaStack());
-  ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints(
-      allocator.Adapter(kArenaAllocRegisterAllocator));
-
-  for (LiveInterval* parent_interval : intervals) {
-    DCHECK(parent_interval->IsParent());
-    DCHECK(!parent_interval->HasSpillSlot());
-    size_t start = parent_interval->GetStart();
-    size_t end = parent_interval->GetLastSibling()->GetEnd();
-    DCHECK_LT(start, end);
-    interval_endpoints.push_back(std::make_tuple(start, true, parent_interval));
-    interval_endpoints.push_back(std::make_tuple(end, false, parent_interval));
-  }
-
-  // Sort by position.
-  // We explicitly ignore the third entry of each tuple (the interval pointer) in order
-  // to maintain determinism.
-  std::sort(interval_endpoints.begin(), interval_endpoints.end(),
-            [] (const std::tuple<size_t, bool, LiveInterval*>& lhs,
-                const std::tuple<size_t, bool, LiveInterval*>& rhs) {
-    return std::tie(std::get<0>(lhs), std::get<1>(lhs))
-         < std::tie(std::get<0>(rhs), std::get<1>(rhs));
-  });
-
-  ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator);
-  for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) {
-    // Extract information from the current tuple.
-    LiveInterval* parent_interval;
-    bool is_interval_beginning;
-    size_t position;
-    std::tie(position, is_interval_beginning, parent_interval) = *it;
-    size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
-
-    if (is_interval_beginning) {
-      DCHECK(!parent_interval->HasSpillSlot());
-      DCHECK_EQ(position, parent_interval->GetStart());
-
-      // Find first available free stack slot(s).
-      size_t slot = 0;
-      for (; ; ++slot) {
-        bool found = true;
-        for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
-          if (taken.IsBitSet(s)) {
-            found = false;
-            break;  // failure
-          }
-        }
-        if (found) {
-          break;  // success
-        }
-      }
-
-      parent_interval->SetSpillSlot(slot);
-
-      *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
-      if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
-        // The parallel move resolver requires that there be an even number of spill slots
-        // allocated for pair value types.
-        ++(*num_stack_slots_used);
-      }
-
-      for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
-        taken.SetBit(s);
-      }
-    } else {
-      DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
-      DCHECK(parent_interval->HasSpillSlot());
-
-      // Free up the stack slot(s) used by this interval.
-      size_t slot = parent_interval->GetSpillSlot();
-      for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
-        DCHECK(taken.IsBitSet(s));
-        taken.ClearBit(s);
-      }
-    }
-  }
-  DCHECK_EQ(taken.NumSetBits(), 0u);
-}
-
-}  // namespace art
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
deleted file mode 100644
index 0e10152049..0000000000
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
-#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
-
-#include "arch/instruction_set.h"
-#include "base/arena_object.h"
-#include "base/array_ref.h"
-#include "base/macros.h"
-#include "base/scoped_arena_containers.h"
-#include "register_allocator.h"
-
-namespace art HIDDEN {
-
-class CodeGenerator;
-class HBasicBlock;
-class HGraph;
-class HInstruction;
-class HParallelMove;
-class Location;
-class SsaLivenessAnalysis;
-class InterferenceNode;
-struct CoalesceOpportunity;
-enum class CoalesceKind;
-
-/**
- * A graph coloring register allocator.
- *
- * The algorithm proceeds as follows:
- * (1) Build an interference graph, where nodes represent live intervals, and edges represent
- *     interferences between two intervals. Coloring this graph with k colors is isomorphic to
- *     finding a valid register assignment with k registers.
- * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are
- *     guaranteed a color. (No matter how we color their adjacent nodes, we can give them a
- *     different color.) As we prune nodes from the graph, more nodes may drop below degree k,
- *     enabling further pruning. The key is to maintain the pruning order in a stack, so that we
- *     can color the nodes in the reverse order.
- *     When there are no more nodes with degree less than k, we start pruning alternate nodes based
- *     on heuristics. Since these nodes are not guaranteed a color, we are careful to
- *     prioritize nodes that require a register. We also prioritize short intervals, because
- *     short intervals cannot be split very much if coloring fails (see below). "Prioritizing"
- *     a node amounts to pruning it later, since it will have fewer interferences if we prune other
- *     nodes first.
- * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign
- *     a node a color, we do one of two things:
- *     - If the node requires a register, we consider the current coloring attempt a failure.
- *       However, we split the node's live interval in order to make the interference graph
- *       sparser, so that future coloring attempts may succeed.
- *     - If the node does not require a register, we simply assign it a location on the stack.
- *
- * If iterative move coalescing is enabled, the algorithm also attempts to conservatively
- * combine nodes in the graph that would prefer to have the same color. (For example, the output
- * of a phi instruction would prefer to have the same register as at least one of its inputs.)
- * There are several additional steps involved with this:
- * - We look for coalesce opportunities by examining each live interval, a step similar to that
- *   used by linear scan when looking for register hints.
- * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist
- *   of low degree nodes that have associated coalesce opportunities. Only when we run out of
- *   coalesce opportunities do we start pruning coalesce-associated nodes.
- * - When pruning a node, if any nodes transition from high degree to low degree, we add
- *   associated coalesce opportunities to the worklist, since these opportunities may now succeed.
- * - Whether two nodes can be combined is decided by two different heuristics--one used when
- *   coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node.
- *   It is vital that we only combine two nodes if the node that remains is guaranteed to receive
- *   a color. This is because additionally spilling is more costly than failing to coalesce.
- * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around
- *   to be used as last-chance register hints when coloring. If nothing else, we try to use
- *   caller-save registers before callee-save registers.
- *
- * A good reference for graph coloring register allocation is
- * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition).
- */
-class RegisterAllocatorGraphColor : public RegisterAllocator {
- public:
-  RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator,
-                              CodeGenerator* codegen,
-                              const SsaLivenessAnalysis& analysis,
-                              bool iterative_move_coalescing = true);
-  ~RegisterAllocatorGraphColor() override;
-
-  void AllocateRegisters() override;
-
-  bool Validate(bool log_fatal_on_failure) override;
-
- private:
-  // Collect all intervals and prepare for register allocation.
-  void ProcessInstructions();
-  void ProcessInstruction(HInstruction* instruction);
-
-  // If any inputs require specific registers, block those registers
-  // at the position of this instruction.
-  void CheckForFixedInputs(HInstruction* instruction);
-
-  // If the output of an instruction requires a specific register, split
-  // the interval and assign the register to the first part.
-  void CheckForFixedOutput(HInstruction* instruction);
-
-  // Add all applicable safepoints to a live interval.
-  // Currently depends on instruction processing order.
-  void AddSafepointsFor(HInstruction* instruction);
-
-  // Collect all live intervals associated with the temporary locations
-  // needed by an instruction.
-  void CheckForTempLiveIntervals(HInstruction* instruction);
-
-  // If a safe point is needed, add a synthesized interval to later record
-  // the number of live registers at this point.
-  void CheckForSafepoint(HInstruction* instruction);
-
-  // Try to remove the SuspendCheck at function entry. Returns true if it was successful.
-  bool TryRemoveSuspendCheckEntry(HInstruction* instruction);
-
-  // Split an interval, but only if `position` is inside of `interval`.
-  // Return either the new interval, or the original interval if not split.
-  static LiveInterval* TrySplit(LiveInterval* interval, size_t position);
-
-  // To ensure every graph can be colored, split live intervals
-  // at their register defs and uses. This creates short intervals with low
-  // degree in the interference graph, which are prioritized during graph
-  // coloring.
-  void SplitAtRegisterUses(LiveInterval* interval);
-
-  // If the given instruction is a catch phi, give it a spill slot.
-  void AllocateSpillSlotForCatchPhi(HInstruction* instruction);
-
-  // Ensure that the given register cannot be allocated for a given range.
-  void BlockRegister(Location location, size_t start, size_t end);
-  void BlockRegisters(size_t start, size_t end, bool caller_save_only = false);
-
-  bool IsCallerSave(size_t reg, bool processing_core_regs);
-
-  // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not
-  // assigned the same stack slot.
-  void ColorSpillSlots(ArrayRef<LiveInterval* const> nodes, /* out */ size_t* num_stack_slots_used);
-
-  // Provide stack slots to nodes that need them.
-  void AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes);
-
-  // Whether iterative move coalescing should be performed. Iterative move coalescing
-  // improves code quality, but increases compile time.
-  const bool iterative_move_coalescing_;
-
-  // Live intervals, split by kind (core and floating point).
-  // These should not contain high intervals, as those are represented by
-  // the corresponding low interval throughout register allocation.
-  ScopedArenaVector<LiveInterval*> core_intervals_;
-  ScopedArenaVector<LiveInterval*> fp_intervals_;
-
-  // Intervals for temporaries, saved for special handling in the resolution phase.
-  ScopedArenaVector<LiveInterval*> temp_intervals_;
-
-  // Safepoints, saved for special handling while processing instructions.
-  ScopedArenaVector<HInstruction*> safepoints_;
-
-  // Interference nodes representing specific registers. These are "pre-colored" nodes
-  // in the interference graph.
-  ScopedArenaVector<InterferenceNode*> physical_core_nodes_;
-  ScopedArenaVector<InterferenceNode*> physical_fp_nodes_;
-
-  // Allocated stack slot counters.
-  size_t num_int_spill_slots_;
-  size_t num_double_spill_slots_;
-  size_t num_float_spill_slots_;
-  size_t num_long_spill_slots_;
-  size_t catch_phi_spill_slot_counter_;
-
-  // Number of stack slots needed for the pointer to the current method.
-  // This is 1 for 32-bit architectures, and 2 for 64-bit architectures.
-  const size_t reserved_art_method_slots_;
-
-  // Number of stack slots needed for outgoing arguments.
-  const size_t reserved_out_slots_;
-
-  friend class ColoringIteration;
-
-  DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index fcdaa2d34f..ffa9937cc5 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -1208,8 +1208,7 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotForCatchPhi(HPhi* phi) {
   LiveInterval* interval = phi->GetLiveInterval();
 
   HInstruction* previous_phi = phi->GetPrevious();
-  DCHECK(previous_phi == nullptr ||
-         previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
+  DCHECK(previous_phi == nullptr || previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber())
       << "Phis expected to be sorted by vreg number, so that equivalent phis are adjacent.";
 
   if (phi->IsVRegEquivalentOf(previous_phi)) {
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index d316aa5dc2..0d2d20682d 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -84,7 +84,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe
 TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\
   test_name(Strategy::kRegisterAllocatorLinearScan);\
 }\
-TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
+/* Note: Graph coloring register allocator has been removed, so the test is DISABLED. */ \
+TEST_F(RegisterAllocatorTest, DISABLED_##test_name##_GraphColor) {\
   test_name(Strategy::kRegisterAllocatorGraphColor);\
 }
 
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 116f52605e..1cdc98a8be 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -490,9 +490,9 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
   DCHECK(instruction != nullptr);
 
   if (instruction->IsIf()) {
-    condition = instruction->AsIf()->InputAt(0)->AsCondition();
+    condition = instruction->AsIf()->InputAt(0)->AsConditionOrNull();
   } else if (instruction->IsSelect()) {
-    condition = instruction->AsSelect()->GetCondition()->AsCondition();
+    condition = instruction->AsSelect()->GetCondition()->AsConditionOrNull();
   }
 
   SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 3f931c4c49..53ad2b12c0 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -610,7 +610,7 @@ void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifte
   }
 }
 
-void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitIntermediateAddress([[maybe_unused]] HIntermediateAddress*) {
   // Although the code generated is a simple `add` instruction, we found through empirical results
   // that spacing it from its use in memory accesses was beneficial.
   last_visited_internal_latency_ = kArmNopLatency;
@@ -618,11 +618,11 @@ void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress*
 }
 
 void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
-    HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HIntermediateAddressIndex*) {
   UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
 }
 
-void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
   last_visited_latency_ = kArmMulIntegerLatency;
 }
 
@@ -806,7 +806,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
   }
 }
 
-void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
   last_visited_internal_latency_ = kArmIntegerOpLatency;
   // Users do not use any data results.
   last_visited_latency_ = 0;
@@ -866,22 +866,22 @@ void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instr
   HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
 }
 
-void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
   last_visited_internal_latency_ = kArmCallInternalLatency;
   last_visited_latency_ = kArmIntegerOpLatency;
 }
 
-void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitInvoke([[maybe_unused]] HInvoke*) {
   last_visited_internal_latency_ = kArmCallInternalLatency;
   last_visited_latency_ = kArmCallLatency;
 }
 
-void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitLoadString([[maybe_unused]] HLoadString*) {
   last_visited_internal_latency_ = kArmLoadStringInternalLatency;
   last_visited_latency_ = kArmMemoryLoadLatency;
 }
 
-void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM::VisitNewArray([[maybe_unused]] HNewArray*) {
   last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
   last_visited_latency_ = kArmCallLatency;
 }
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index 0da21c187f..cedc12a2be 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -53,7 +53,7 @@ class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
       : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
 
   // Default visitor for instructions not handled specifically below.
-  void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
+  void VisitInstruction([[maybe_unused]] HInstruction*) override {
     last_visited_latency_ = kArmIntegerOpLatency;
   }
 
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 3071afd951..5113cf446d 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -30,30 +30,30 @@ void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr
 }
 
 void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight(
-    HBitwiseNegatedRight* ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HBitwiseNegatedRight*) {
   last_visited_latency_ = kArm64IntegerOpLatency;
 }
 
 void SchedulingLatencyVisitorARM64::VisitDataProcWithShifterOp(
-    HDataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HDataProcWithShifterOp*) {
   last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
 }
 
 void SchedulingLatencyVisitorARM64::VisitIntermediateAddress(
-    HIntermediateAddress* ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HIntermediateAddress*) {
   // Although the code generated is a simple `add` instruction, we found through empirical results
   // that spacing it from its use in memory accesses was beneficial.
   last_visited_latency_ = kArm64IntegerOpLatency + 2;
 }
 
 void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex(
-    HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HIntermediateAddressIndex* instr) {
   // Although the code generated is a simple `add` instruction, we found through empirical results
   // that spacing it from its use in memory accesses was beneficial.
   last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2;
 }
 
-void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
   last_visited_latency_ = kArm64MulIntegerLatency;
 }
 
@@ -65,15 +65,15 @@ void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) {
   last_visited_latency_ = kArm64MemoryLoadLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitArrayLength([[maybe_unused]] HArrayLength*) {
   last_visited_latency_ = kArm64MemoryLoadLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitArraySet([[maybe_unused]] HArraySet*) {
   last_visited_latency_ = kArm64MemoryStoreLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
   last_visited_internal_latency_ = kArm64IntegerOpLatency;
   // Users do not use any data results.
   last_visited_latency_ = 0;
@@ -113,21 +113,21 @@ void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) {
   }
 }
 
-void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet([[maybe_unused]] HInstanceFieldGet*) {
   last_visited_latency_ = kArm64MemoryLoadLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
   last_visited_internal_latency_ = kArm64CallInternalLatency;
   last_visited_latency_ = kArm64IntegerOpLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitInvoke([[maybe_unused]] HInvoke*) {
   last_visited_internal_latency_ = kArm64CallInternalLatency;
   last_visited_latency_ = kArm64CallLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitLoadString([[maybe_unused]] HLoadString*) {
   last_visited_internal_latency_ = kArm64LoadStringInternalLatency;
   last_visited_latency_ = kArm64MemoryLoadLatency;
 }
@@ -138,7 +138,7 @@ void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) {
       : kArm64MulIntegerLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitNewArray([[maybe_unused]] HNewArray*) {
   last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency;
   last_visited_latency_ = kArm64CallLatency;
 }
@@ -181,7 +181,7 @@ void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) {
   }
 }
 
-void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitStaticFieldGet([[maybe_unused]] HStaticFieldGet*) {
   last_visited_latency_ = kArm64MemoryLoadLatency;
 }
 
@@ -211,7 +211,7 @@ void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *in
 }
 
 void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
-    HVecReplicateScalar* instr ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HVecReplicateScalar* instr) {
   last_visited_latency_ = kArm64SIMDReplicateOpLatency;
 }
 
@@ -223,7 +223,7 @@ void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) {
   HandleSimpleArithmeticSIMD(instr);
 }
 
-void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecCnv([[maybe_unused]] HVecCnv* instr) {
   last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency;
 }
 
@@ -279,19 +279,19 @@ void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) {
   HandleSimpleArithmeticSIMD(instr);
 }
 
-void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecAnd([[maybe_unused]] HVecAnd* instr) {
   last_visited_latency_ = kArm64SIMDIntegerOpLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecAndNot([[maybe_unused]] HVecAndNot* instr) {
   last_visited_latency_ = kArm64SIMDIntegerOpLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecOr([[maybe_unused]] HVecOr* instr) {
   last_visited_latency_ = kArm64SIMDIntegerOpLatency;
 }
 
-void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::VisitVecXor([[maybe_unused]] HVecXor* instr) {
   last_visited_latency_ = kArm64SIMDIntegerOpLatency;
 }
 
@@ -312,13 +312,12 @@ void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
 }
 
 void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
-    HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] HVecMultiplyAccumulate* instr) {
   last_visited_latency_ = kArm64SIMDMulIntegerLatency;
 }
 
-void SchedulingLatencyVisitorARM64::HandleVecAddress(
-    HVecMemoryOperation* instruction,
-    size_t size ATTRIBUTE_UNUSED) {
+void SchedulingLatencyVisitorARM64::HandleVecAddress(HVecMemoryOperation* instruction,
+                                                     [[maybe_unused]] size_t size) {
   HInstruction* index = instruction->InputAt(1);
   if (!index->IsConstant()) {
     last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency;
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index ec41577e9d..7ce00e00ab 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -59,7 +59,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
 class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
  public:
   // Default visitor for instructions not handled specifically below.
-  void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
+  void VisitInstruction([[maybe_unused]] HInstruction*) override {
     last_visited_latency_ = kArm64IntegerOpLatency;
   }
 
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 6a10440d11..07065efbb7 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -46,8 +46,7 @@ static bool IsSimpleBlock(HBasicBlock* block) {
     } else if (instruction->CanBeMoved() &&
                !instruction->HasSideEffects() &&
                !instruction->CanThrow()) {
-      if (instruction->IsSelect() &&
-          instruction->AsSelect()->GetCondition()->GetBlock() == block) {
+      if (instruction->IsSelect() && instruction->AsSelect()->GetCondition()->GetBlock() == block) {
         // Count one HCondition and HSelect in the same block as a single instruction.
         // This enables finding nested selects.
         continue;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index a658252e69..2179bf50b5 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -604,7 +604,7 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
  */
 HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) {
   // We place the floating point constant next to this constant.
-  HFloatConstant* result = constant->GetNext()->AsFloatConstant();
+  HFloatConstant* result = constant->GetNext()->AsFloatConstantOrNull();
   if (result == nullptr) {
     float value = bit_cast<float, int32_t>(constant->GetValue());
     result = new (graph_->GetAllocator()) HFloatConstant(value);
@@ -626,7 +626,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) {
  */
 HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) {
   // We place the floating point constant next to this constant.
-  HDoubleConstant* result = constant->GetNext()->AsDoubleConstant();
+  HDoubleConstant* result = constant->GetNext()->AsDoubleConstantOrNull();
   if (result == nullptr) {
     double value = bit_cast<double, int64_t>(constant->GetValue());
     result = new (graph_->GetAllocator()) HDoubleConstant(value);
@@ -652,16 +652,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, DataType::
 
   // We place the floating point /reference phi next to this phi.
   HInstruction* next = phi->GetNext();
-  if (next != nullptr
-      && next->AsPhi()->GetRegNumber() == phi->GetRegNumber()
-      && next->GetType() != type) {
+  if (next != nullptr &&
+      next->AsPhi()->GetRegNumber() == phi->GetRegNumber() &&
+      next->GetType() != type) {
     // Move to the next phi to see if it is the one we are looking for.
     next = next->GetNext();
   }
 
-  if (next == nullptr
-      || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber())
-      || (next->GetType() != type)) {
+  if (next == nullptr ||
+      (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) ||
+      (next->GetType() != type)) {
     ArenaAllocator* allocator = graph_->GetAllocator();
     HInputsRef inputs = phi->GetInputs();
     HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type);
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index 2df0f34c7d..18c945381d 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -31,6 +31,7 @@ namespace art HIDDEN {
 class SsaLivenessAnalysisTest : public OptimizingUnitTest {
  protected:
   void SetUp() override {
+    TEST_SETUP_DISABLED_FOR_RISCV64();
     OptimizingUnitTest::SetUp();
     graph_ = CreateGraph();
     compiler_options_ = CommonCompilerTest::CreateCompilerOptions(kRuntimeISA, "default");
@@ -42,6 +43,11 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest {
     graph_->SetEntryBlock(entry_);
   }
 
+  void TearDown() override {
+    TEST_TEARDOWN_DISABLED_FOR_RISCV64();
+    OptimizingUnitTest::TearDown();
+  }
+
  protected:
   HBasicBlock* CreateSuccessor(HBasicBlock* block) {
     HGraph* graph = block->GetGraph();
@@ -58,6 +64,7 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest {
 };
 
 TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
+  TEST_DISABLED_FOR_RISCV64();
   HInstruction* arg = new (GetAllocator()) HParameterValue(
       graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32);
   entry_->AddInstruction(arg);
@@ -78,6 +85,7 @@ TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
 }
 
 TEST_F(SsaLivenessAnalysisTest, TestAput) {
+  TEST_DISABLED_FOR_RISCV64();
   HInstruction* array = new (GetAllocator()) HParameterValue(
       graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
   HInstruction* index = new (GetAllocator()) HParameterValue(
@@ -147,6 +155,7 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) {
 }
 
 TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
+  TEST_DISABLED_FOR_RISCV64();
   HInstruction* array = new (GetAllocator()) HParameterValue(
       graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference);
   HInstruction* index = new (GetAllocator()) HParameterValue(
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index ce343dffec..1d9be3956a 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -76,7 +76,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() {
     HPhi* phi = worklist.back();
     worklist.pop_back();
     for (HInstruction* raw_input : phi->GetInputs()) {
-      HPhi* input = raw_input->AsPhi();
+      HPhi* input = raw_input->AsPhiOrNull();
       if (input != nullptr && input->IsDead()) {
         // Input is a dead phi. Revive it and add to the worklist. We make sure
         // that the phi was not dead initially (see definition of `initially_live`).
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1a368ed347..2ecda7610e 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -51,7 +51,8 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
                                  size_t fp_spill_mask,
                                  uint32_t num_dex_registers,
                                  bool baseline,
-                                 bool debuggable) {
+                                 bool debuggable,
+                                 bool has_should_deoptimize_flag) {
   DCHECK(!in_method_) << "Mismatched Begin/End calls";
   in_method_ = true;
   DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
@@ -63,6 +64,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
   num_dex_registers_ = num_dex_registers;
   baseline_ = baseline;
   debuggable_ = debuggable;
+  has_should_deoptimize_flag_ = has_should_deoptimize_flag;
 
   if (kVerifyStackMaps) {
     dchecks_.emplace_back([=](const CodeInfo& code_info) {
@@ -152,8 +154,10 @@ void StackMapStream::BeginStackMapEntry(
     // Create lambda method, which will be executed at the very end to verify data.
     // Parameters and local variables will be captured(stored) by the lambda "[=]".
     dchecks_.emplace_back([=](const CodeInfo& code_info) {
+      // The `native_pc_offset` may have been overridden using `SetStackMapNativePcOffset(.)`.
+      uint32_t final_native_pc_offset = GetStackMapNativePcOffset(stack_map_index);
       if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) {
-        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset,
+        StackMap stack_map = code_info.GetStackMapForNativePcOffset(final_native_pc_offset,
                                                                     instruction_set_);
         CHECK_EQ(stack_map.Row(), stack_map_index);
       } else if (kind == StackMap::Kind::Catch) {
@@ -162,7 +166,7 @@ void StackMapStream::BeginStackMapEntry(
         CHECK_EQ(stack_map.Row(), stack_map_index);
       }
       StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
-      CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset);
+      CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), final_native_pc_offset);
       CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind));
       CHECK_EQ(stack_map.GetDexPc(), dex_pc);
       CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask);
@@ -374,10 +378,12 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
   DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls";
   DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls";
 
-  uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
+  uint32_t flags = 0;
+  flags |= (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
   flags |= baseline_ ? CodeInfo::kIsBaseline : 0;
   flags |= debuggable_ ? CodeInfo::kIsDebuggable : 0;
-  DCHECK_LE(flags, kVarintMax);  // Ensure flags can be read directly as byte.
+  flags |= has_should_deoptimize_flag_ ? CodeInfo::kHasShouldDeoptimizeFlag : 0;
+
   uint32_t bit_table_flags = 0;
   ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
     if (bit_table->size() != 0) {  // Record which bit-tables are stored.
@@ -409,6 +415,8 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
   CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
   CHECK_EQ(CodeInfo::HasInlineInfo(buffer.data()), inline_infos_.size() > 0);
   CHECK_EQ(CodeInfo::IsBaseline(buffer.data()), baseline_);
+  CHECK_EQ(CodeInfo::IsDebuggable(buffer.data()), debuggable_);
+  CHECK_EQ(CodeInfo::HasShouldDeoptimizeFlag(buffer.data()), has_should_deoptimize_flag_);
 
   // Verify all written data (usually only in debug builds).
   if (kVerifyStackMaps) {
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 643af2da94..f027850ce6 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -66,7 +66,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
                    size_t fp_spill_mask,
                    uint32_t num_dex_registers,
                    bool baseline,
-                   bool debuggable);
+                   bool debuggable,
+                   bool has_should_deoptimize_flag = false);
   void EndMethod(size_t code_size);
 
   void BeginStackMapEntry(
@@ -129,8 +130,9 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
   uint32_t core_spill_mask_ = 0;
   uint32_t fp_spill_mask_ = 0;
   uint32_t num_dex_registers_ = 0;
-  bool baseline_;
-  bool debuggable_;
+  bool baseline_ = false;
+  bool debuggable_ = false;
+  bool has_should_deoptimize_flag_ = false;
   BitTableBuilder<StackMap> stack_maps_;
   BitTableBuilder<RegisterMask> register_masks_;
   BitmapTableBuilder stack_masks_;
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index e266618980..d86869ce0f 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -33,7 +33,7 @@ class MemoryOperandVisitor final : public HGraphVisitor {
  private:
   void VisitBoundsCheck(HBoundsCheck* check) override {
     // Replace the length by the array itself, so that we can do compares to memory.
-    HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
+    HArrayLength* array_len = check->InputAt(1)->AsArrayLengthOrNull();
 
     // We only want to replace an ArrayLength.
     if (array_len == nullptr) {
diff --git a/compiler/trampolines/trampoline_compiler.cc b/compiler/trampolines/trampoline_compiler.cc
index a122d3c9d3..d9f56629ef 100644
--- a/compiler/trampolines/trampoline_compiler.cc
+++ b/compiler/trampolines/trampoline_compiler.cc
@@ -28,6 +28,10 @@
 #include "utils/arm64/assembler_arm64.h"
 #endif
 
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "utils/riscv64/assembler_riscv64.h"
+#endif
+
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "utils/x86/assembler_x86.h"
 #endif
@@ -57,9 +61,6 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
   ArmVIXLAssembler assembler(allocator);
 
   switch (abi) {
-    case kInterpreterAbi:  // Thread* is first argument (R0) in interpreter ABI.
-      ___ Ldr(pc, MemOperand(r0, offset.Int32Value()));
-      break;
     case kJniAbi: {  // Load via Thread* held in JNIEnv* in first argument (R0).
       vixl::aarch32::UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
       const vixl::aarch32::Register temp_reg = temps.Acquire();
@@ -78,7 +79,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(entry_stub->data(), entry_stub->size());
-  __ FinalizeInstructions(code);
+  __ CopyInstructions(code);
 
   return std::move(entry_stub);
 }
@@ -95,11 +96,6 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
   Arm64Assembler assembler(allocator);
 
   switch (abi) {
-    case kInterpreterAbi:  // Thread* is first argument (X0) in interpreter ABI.
-      __ JumpTo(Arm64ManagedRegister::FromXRegister(X0), Offset(offset.Int32Value()),
-          Arm64ManagedRegister::FromXRegister(IP1));
-
-      break;
     case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (X0).
       __ LoadRawPtr(Arm64ManagedRegister::FromXRegister(IP1),
                       Arm64ManagedRegister::FromXRegister(X0),
@@ -120,13 +116,47 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(entry_stub->data(), entry_stub->size());
-  __ FinalizeInstructions(code);
+  __ CopyInstructions(code);
 
   return std::move(entry_stub);
 }
 }  // namespace arm64
 #endif  // ART_ENABLE_CODEGEN_arm64
 
+#ifdef ART_ENABLE_CODEGEN_riscv64
+namespace riscv64 {
+static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* allocator,
+                                                                    EntryPointCallingConvention abi,
+                                                                    ThreadOffset64 offset) {
+  Riscv64Assembler assembler(allocator);
+  ScratchRegisterScope srs(&assembler);
+  XRegister tmp = srs.AllocateXRegister();
+
+  switch (abi) {
+    case kJniAbi:  // Load via Thread* held in JNIEnv* in first argument (A0).
+      __ Loadd(tmp,
+               A0,
+               JNIEnvExt::SelfOffset(static_cast<size_t>(kRiscv64PointerSize)).Int32Value());
+      __ Loadd(tmp, tmp, offset.Int32Value());
+      __ Jr(tmp);
+      break;
+    case kQuickAbi:  // TR holds Thread*.
+      __ Loadd(tmp, TR, offset.Int32Value());
+      __ Jr(tmp);
+      break;
+  }
+
+  __ FinalizeCode();
+  size_t cs = __ CodeSize();
+  std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
+  MemoryRegion code(entry_stub->data(), entry_stub->size());
+  __ CopyInstructions(code);
+
+  return std::move(entry_stub);
+}
+}  // namespace riscv64
+#endif  // ART_ENABLE_CODEGEN_riscv64
+
 #ifdef ART_ENABLE_CODEGEN_x86
 namespace x86 {
 static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocator* allocator,
@@ -141,7 +171,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(entry_stub->data(), entry_stub->size());
-  __ FinalizeInstructions(code);
+  __ CopyInstructions(code);
 
   return std::move(entry_stub);
 }
@@ -162,7 +192,7 @@ static std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline(ArenaAllocat
   size_t cs = __ CodeSize();
   std::unique_ptr<std::vector<uint8_t>> entry_stub(new std::vector<uint8_t>(cs));
   MemoryRegion code(entry_stub->data(), entry_stub->size());
-  __ FinalizeInstructions(code);
+  __ CopyInstructions(code);
 
   return std::move(entry_stub);
 }
@@ -179,6 +209,10 @@ std::unique_ptr<const std::vector<uint8_t>> CreateTrampoline64(InstructionSet is
     case InstructionSet::kArm64:
       return arm64::CreateTrampoline(&allocator, abi, offset);
 #endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+    case InstructionSet::kRiscv64:
+      return riscv64::CreateTrampoline(&allocator, abi, offset);
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case InstructionSet::kX86_64:
       return x86_64::CreateTrampoline(&allocator, offset);
diff --git a/compiler/trampolines/trampoline_compiler.h b/compiler/trampolines/trampoline_compiler.h
index 32e35ae1d6..25b715fab0 100644
--- a/compiler/trampolines/trampoline_compiler.h
+++ b/compiler/trampolines/trampoline_compiler.h
@@ -28,8 +28,6 @@
 namespace art HIDDEN {
 
 enum EntryPointCallingConvention {
-  // ABI of invocations to a method's interpreter entry point.
-  kInterpreterAbi,
   // ABI of calls to a method's native code, only used for native methods.
   kJniAbi,
   // ABI of calls to a method's quick code entry point.
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index c7ca003530..d64de09501 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -52,7 +52,7 @@ const uint8_t* ArmVIXLAssembler::CodeBufferBaseAddress() const {
   return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
 }
 
-void ArmVIXLAssembler::FinalizeInstructions(const MemoryRegion& region) {
+void ArmVIXLAssembler::CopyInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
   MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
   region.CopyFrom(0, from);
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 741119d7f7..50dc06fefc 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -173,6 +173,30 @@ class ArmVIXLMacroAssembler final : public vixl32::MacroAssembler {
     }
   }
   using MacroAssembler::Vmov;
+
+  // TODO(b/281982421): Move the implementation of Mrrc to vixl and remove this implementation.
+  void Mrrc(vixl32::Register r1, vixl32::Register r2, int coproc, int opc1, int crm) {
+    // See ARM A-profile A32/T32 Instruction set architecture
+    // https://developer.arm.com/documentation/ddi0597/2022-09/Base-Instructions/MRRC--Move-to-two-general-purpose-registers-from-System-register-
+    CHECK(coproc == 15 || coproc == 14);
+    if (IsUsingT32()) {
+      uint32_t inst = (0b111011000101 << 20) |
+                      (r2.GetCode() << 16) |
+                      (r1.GetCode() << 12) |
+                      (coproc << 8) |
+                      (opc1 << 4) |
+                      crm;
+      EmitT32_32(inst);
+    } else {
+      uint32_t inst = (0b000011000101 << 20) |
+                      (r2.GetCode() << 16) |
+                      (r1.GetCode() << 12) |
+                      (coproc << 8) |
+                      (opc1 << 4) |
+                      crm;
+      EmitA32(inst);
+    }
+  }
 };
 
 class ArmVIXLAssembler final : public Assembler {
@@ -194,12 +218,12 @@ class ArmVIXLAssembler final : public Assembler {
   const uint8_t* CodeBufferBaseAddress() const override;
 
   // Copy instructions out of assembly buffer into the given region of memory.
-  void FinalizeInstructions(const MemoryRegion& region) override;
+  void CopyInstructions(const MemoryRegion& region) override;
 
-  void Bind(Label* label ATTRIBUTE_UNUSED) override {
+  void Bind([[maybe_unused]] Label* label) override {
     UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM";
   }
-  void Jump(Label* label ATTRIBUTE_UNUSED) override {
+  void Jump([[maybe_unused]] Label* label) override {
     UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM";
   }
 
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 54873454eb..7a887fa064 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -344,13 +344,13 @@ void ArmVIXLJNIMacroAssembler::StoreStackPointerToThread(ThreadOffset32 thr_offs
   }
 }
 
-void ArmVIXLJNIMacroAssembler::SignExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
-                                          size_t size ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg,
+                                          [[maybe_unused]] size_t size) {
   UNIMPLEMENTED(FATAL) << "no sign extension necessary for arm";
 }
 
-void ArmVIXLJNIMacroAssembler::ZeroExtend(ManagedRegister mreg ATTRIBUTE_UNUSED,
-                                          size_t size ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg,
+                                          [[maybe_unused]] size_t size) {
   UNIMPLEMENTED(FATAL) << "no zero extension necessary for arm";
 }
 
@@ -720,7 +720,7 @@ void ArmVIXLJNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
 
 void ArmVIXLJNIMacroAssembler::Move(ManagedRegister mdst,
                                     ManagedRegister msrc,
-                                    size_t size  ATTRIBUTE_UNUSED) {
+                                    [[maybe_unused]] size_t size) {
   ArmManagedRegister dst = mdst.AsArm();
   if (kIsDebugBuild) {
     // Check that the destination is not a scratch register.
@@ -861,13 +861,13 @@ void ArmVIXLJNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister
   ___ Ldr(reg, MemOperand(reg));
 }
 
-void ArmVIXLJNIMacroAssembler::VerifyObject(ManagedRegister src ATTRIBUTE_UNUSED,
-                                            bool could_be_null ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister src,
+                                            [[maybe_unused]] bool could_be_null) {
   // TODO: not validating references.
 }
 
-void ArmVIXLJNIMacroAssembler::VerifyObject(FrameOffset src ATTRIBUTE_UNUSED,
-                                            bool could_be_null ATTRIBUTE_UNUSED) {
+void ArmVIXLJNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src,
+                                            [[maybe_unused]] bool could_be_null) {
   // TODO: not validating references.
 }
 
diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc
index 26dce7c502..13acc7c852 100644
--- a/compiler/utils/arm64/assembler_arm64.cc
+++ b/compiler/utils/arm64/assembler_arm64.cc
@@ -79,7 +79,7 @@ const uint8_t* Arm64Assembler::CodeBufferBaseAddress() const {
   return vixl_masm_.GetBuffer().GetStartAddress<const uint8_t*>();
 }
 
-void Arm64Assembler::FinalizeInstructions(const MemoryRegion& region) {
+void Arm64Assembler::CopyInstructions(const MemoryRegion& region) {
   // Copy the instructions from the buffer.
   MemoryRegion from(vixl_masm_.GetBuffer()->GetStartAddress<void*>(), CodeSize());
   region.CopyFrom(0, from);
diff --git a/compiler/utils/arm64/assembler_arm64.h b/compiler/utils/arm64/assembler_arm64.h
index f8168903bd..ad6a8edadf 100644
--- a/compiler/utils/arm64/assembler_arm64.h
+++ b/compiler/utils/arm64/assembler_arm64.h
@@ -91,7 +91,7 @@ class Arm64Assembler final : public Assembler {
   const uint8_t* CodeBufferBaseAddress() const override;
 
   // Copy instructions out of assembly buffer into the given region of memory.
-  void FinalizeInstructions(const MemoryRegion& region) override;
+  void CopyInstructions(const MemoryRegion& region) override;
 
   void LoadRawPtr(ManagedRegister dest, ManagedRegister base, Offset offs);
 
@@ -145,10 +145,10 @@ class Arm64Assembler final : public Assembler {
   // MaybeGenerateMarkingRegisterCheck and is passed to the BRK instruction.
   void GenerateMarkingRegisterCheck(vixl::aarch64::Register temp, int code = 0);
 
-  void Bind(Label* label ATTRIBUTE_UNUSED) override {
+  void Bind([[maybe_unused]] Label* label) override {
     UNIMPLEMENTED(FATAL) << "Do not use Bind(Label*) for ARM64";
   }
-  void Jump(Label* label ATTRIBUTE_UNUSED) override {
+  void Jump([[maybe_unused]] Label* label) override {
     UNIMPLEMENTED(FATAL) << "Do not use Jump(Label*) for ARM64";
   }
 
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index 9e9f122cf6..c5380695d9 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -705,7 +705,7 @@ void Arm64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m
 }
 
 void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
-    JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) {
+    JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) {
   constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
   constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
   constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>();
@@ -734,8 +734,8 @@ void Arm64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
 
 void Arm64JNIMacroAssembler::TryToTransitionFromNativeToRunnable(
     JNIMacroLabel* label,
-    ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED,
-    ManagedRegister return_reg ATTRIBUTE_UNUSED) {
+    [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs,
+    [[maybe_unused]] ManagedRegister return_reg) {
   constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
   constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
   constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kArm64PointerSize>();
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index b82f0dc4b4..1c04a3d20b 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -57,18 +57,21 @@ void AssemblerBuffer::ProcessFixups(const MemoryRegion& region) {
     fixup->Process(region, fixup->position());
     fixup = fixup->previous();
   }
+#ifndef NDEBUG
+  fixups_processed_ = true;
+#endif
+}
+
+
+void AssemblerBuffer::ProcessFixups() {
+  MemoryRegion from(reinterpret_cast<void*>(contents()), Size());
+  ProcessFixups(from);
 }
 
 
-void AssemblerBuffer::FinalizeInstructions(const MemoryRegion& instructions) {
-  // Copy the instructions from the buffer.
+void AssemblerBuffer::CopyInstructions(const MemoryRegion& instructions) {
   MemoryRegion from(reinterpret_cast<void*>(contents()), Size());
   instructions.CopyFrom(0, from);
-  // Process fixups in the instructions.
-  ProcessFixups(instructions);
-#ifndef NDEBUG
-  fixups_processed_ = true;
-#endif
 }
 
 
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index 13a5d9fd01..f3fa711dbb 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -163,9 +163,8 @@ class AssemblerBuffer {
 
   uint8_t* contents() const { return contents_; }
 
-  // Copy the assembled instructions into the specified memory block
-  // and apply all fixups.
-  void FinalizeInstructions(const MemoryRegion& region);
+  // Copy the assembled instructions into the specified memory block.
+  void CopyInstructions(const MemoryRegion& region);
 
   // To emit an instruction to the assembler buffer, the EnsureCapacity helper
   // must be used to guarantee that the underlying data area is big enough to
@@ -246,6 +245,8 @@ class AssemblerBuffer {
   // The provided `min_capacity` must be higher than current `Capacity()`.
   void ExtendCapacity(size_t min_capacity);
 
+  void ProcessFixups();
+
  private:
   // The limit is set to kMinimumGap bytes before the end of the data area.
   // This leaves enough space for the longest possible instruction and allows
@@ -357,7 +358,10 @@ class DebugFrameOpCodeWriterForAssembler final
 class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
  public:
   // Finalize the code; emit slow paths, fixup branches, add literal pool, etc.
-  virtual void FinalizeCode() { buffer_.EmitSlowPaths(this); }
+  virtual void FinalizeCode() {
+    buffer_.EmitSlowPaths(this);
+    buffer_.ProcessFixups();
+  }
 
   // Size of generated code
   virtual size_t CodeSize() const { return buffer_.Size(); }
@@ -375,12 +379,12 @@ class Assembler : public DeletableArenaObject<kArenaAllocAssembler> {
   virtual size_t CodePosition() { return CodeSize(); }
 
   // Copy instructions out of assembly buffer into the given region of memory
-  virtual void FinalizeInstructions(const MemoryRegion& region) {
-    buffer_.FinalizeInstructions(region);
+  virtual void CopyInstructions(const MemoryRegion& region) {
+    buffer_.CopyInstructions(region);
   }
 
   // TODO: Implement with disassembler.
-  virtual void Comment(const char* format ATTRIBUTE_UNUSED, ...) {}
+  virtual void Comment([[maybe_unused]] const char* format, ...) {}
 
   virtual void Bind(Label* label) = 0;
   virtual void Jump(Label* label) = 0;
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index d03e5a7abc..72f48367a6 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -26,6 +26,7 @@
 #include <fstream>
 #include <iterator>
 
+#include "base/array_ref.h"
 #include "base/macros.h"
 #include "base/malloc_arena_pool.h"
 #include "assembler_test_base.h"
@@ -200,8 +201,8 @@ class AssemblerTest : public AssemblerTestBase {
   template <typename Reg1, typename Reg2, typename ImmType>
   std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, ImmType),
                                               int imm_bits,
-                                              const std::vector<Reg1*> reg1_registers,
-                                              const std::vector<Reg2*> reg2_registers,
+                                              ArrayRef<const Reg1> reg1_registers,
+                                              ArrayRef<const Reg2> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               const std::string& fmt,
@@ -215,48 +216,28 @@ class AssemblerTest : public AssemblerTestBase {
         for (int64_t imm : imms) {
           ImmType new_imm = CreateImmediate(imm);
           if (f != nullptr) {
-            (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias);
+            (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias);
           }
           std::string base = fmt;
 
-          std::string reg1_string = (this->*GetName1)(*reg1);
-          size_t reg1_index;
-          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
-            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
-          }
-
-          std::string reg2_string = (this->*GetName2)(*reg2);
-          size_t reg2_index;
-          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
-            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
-          }
+          ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+          ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+          ReplaceImm(imm, bias, multiplier, &base);
 
-          size_t imm_index = base.find(IMM_TOKEN);
-          if (imm_index != std::string::npos) {
-            std::ostringstream sreg;
-            sreg << imm * multiplier + bias;
-            std::string imm_string = sreg.str();
-            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
-          }
-
-          if (str.size() > 0) {
-            str += "\n";
-          }
           str += base;
+          str += "\n";
         }
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename Reg1, typename Reg2, typename Reg3, typename ImmType>
   std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, Reg3, ImmType),
                                               int imm_bits,
-                                              const std::vector<Reg1*> reg1_registers,
-                                              const std::vector<Reg2*> reg2_registers,
-                                              const std::vector<Reg3*> reg3_registers,
+                                              ArrayRef<const Reg1> reg1_registers,
+                                              ArrayRef<const Reg2> reg2_registers,
+                                              ArrayRef<const Reg3> reg3_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               std::string (AssemblerTest::*GetName3)(const Reg3&),
@@ -271,53 +252,28 @@ class AssemblerTest : public AssemblerTestBase {
           for (int64_t imm : imms) {
             ImmType new_imm = CreateImmediate(imm);
             if (f != nullptr) {
-              (assembler_.get()->*f)(*reg1, *reg2, *reg3, new_imm + bias);
+              (assembler_.get()->*f)(reg1, reg2, reg3, new_imm + bias);
             }
             std::string base = fmt;
 
-            std::string reg1_string = (this->*GetName1)(*reg1);
-            size_t reg1_index;
-            while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
-              base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
-            }
-
-            std::string reg2_string = (this->*GetName2)(*reg2);
-            size_t reg2_index;
-            while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
-              base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
-            }
-
-            std::string reg3_string = (this->*GetName3)(*reg3);
-            size_t reg3_index;
-            while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) {
-              base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string);
-            }
-
-            size_t imm_index = base.find(IMM_TOKEN);
-            if (imm_index != std::string::npos) {
-              std::ostringstream sreg;
-              sreg << imm + bias;
-              std::string imm_string = sreg.str();
-              base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
-            }
+            ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+            ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+            ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base);
+            ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
 
-            if (str.size() > 0) {
-              str += "\n";
-            }
             str += base;
+            str += "\n";
           }
         }
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename ImmType, typename Reg1, typename Reg2>
   std::string RepeatTemplatedImmBitsRegisters(void (Ass::*f)(ImmType, Reg1, Reg2),
-                                              const std::vector<Reg1*> reg1_registers,
-                                              const std::vector<Reg2*> reg2_registers,
+                                              ArrayRef<const Reg1> reg1_registers,
+                                              ArrayRef<const Reg2> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               int imm_bits,
@@ -332,46 +288,26 @@ class AssemblerTest : public AssemblerTestBase {
         for (int64_t imm : imms) {
           ImmType new_imm = CreateImmediate(imm);
           if (f != nullptr) {
-            (assembler_.get()->*f)(new_imm, *reg1, *reg2);
+            (assembler_.get()->*f)(new_imm, reg1, reg2);
           }
           std::string base = fmt;
 
-          std::string reg1_string = (this->*GetName1)(*reg1);
-          size_t reg1_index;
-          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
-            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
-          }
-
-          std::string reg2_string = (this->*GetName2)(*reg2);
-          size_t reg2_index;
-          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
-            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
-          }
-
-          size_t imm_index = base.find(IMM_TOKEN);
-          if (imm_index != std::string::npos) {
-            std::ostringstream sreg;
-            sreg << imm;
-            std::string imm_string = sreg.str();
-            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
-          }
+          ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+          ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+          ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
 
-          if (str.size() > 0) {
-            str += "\n";
-          }
           str += base;
+          str += "\n";
         }
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename RegType, typename ImmType>
   std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
                                              int imm_bits,
-                                             const std::vector<RegType*> registers,
+                                             ArrayRef<const RegType> registers,
                                              std::string (AssemblerTest::*GetName)(const RegType&),
                                              const std::string& fmt,
                                              int bias) {
@@ -382,36 +318,148 @@ class AssemblerTest : public AssemblerTestBase {
       for (int64_t imm : imms) {
         ImmType new_imm = CreateImmediate(imm);
         if (f != nullptr) {
-          (assembler_.get()->*f)(*reg, new_imm + bias);
+          (assembler_.get()->*f)(reg, new_imm + bias);
         }
         std::string base = fmt;
 
-        std::string reg_string = (this->*GetName)(*reg);
-        size_t reg_index;
-        while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
-          base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
-        }
+        ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base);
+        ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
 
-        size_t imm_index = base.find(IMM_TOKEN);
-        if (imm_index != std::string::npos) {
-          std::ostringstream sreg;
-          sreg << imm + bias;
-          std::string imm_string = sreg.str();
-          base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+        str += base;
+        str += "\n";
+      }
+    }
+    return str;
+  }
+
+  template <typename RegType, typename ImmType>
+  std::string RepeatTemplatedRegisterImmBitsShift(
+      void (Ass::*f)(RegType, ImmType),
+      int imm_bits,
+      int shift,
+      ArrayRef<const RegType> registers,
+      std::string (AssemblerTest::*GetName)(const RegType&),
+      const std::string& fmt,
+      int bias) {
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift);
+
+    for (auto reg : registers) {
+      for (int64_t imm : imms) {
+        ImmType new_imm = CreateImmediate(imm);
+        if (f != nullptr) {
+          (assembler_.get()->*f)(reg, new_imm + bias);
         }
+        std::string base = fmt;
+
+        ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base);
+        ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
+
+        str += base;
+        str += "\n";
+      }
+    }
+    return str;
+  }
+
+  template <typename ImmType>
+  std::string RepeatTemplatedImmBitsShift(
+      void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) {
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift);
+
+    WarnOnCombinations(imms.size());
+
+    std::string str;
 
-        if (str.size() > 0) {
+    for (int64_t imm : imms) {
+      ImmType new_imm = CreateImmediate(imm);
+      if (f != nullptr) {
+        (assembler_.get()->*f)(new_imm + bias);
+      }
+      std::string base = fmt;
+
+      ReplaceImm(imm, bias, /*multiplier=*/ 1, &base);
+
+      str += base;
+      str += "\n";
+    }
+    return str;
+  }
+
+  template <typename Reg1, typename Reg2, typename ImmType>
+  std::string RepeatTemplatedRegistersImmBitsShift(
+      void (Ass::*f)(Reg1, Reg2, ImmType),
+      int imm_bits,
+      int shift,
+      ArrayRef<const Reg1> reg1_registers,
+      ArrayRef<const Reg2> reg2_registers,
+      std::string (AssemblerTest::*GetName1)(const Reg1&),
+      std::string (AssemblerTest::*GetName2)(const Reg2&),
+      const std::string& fmt,
+      int bias = 0,
+      int multiplier = 1) {
+    std::string str;
+    std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0), shift);
+
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (int64_t imm : imms) {
+          ImmType new_imm = CreateImmediate(imm);
+          if (f != nullptr) {
+            (assembler_.get()->*f)(reg1, reg2, new_imm * multiplier + bias);
+          }
+          std::string base = fmt;
+
+          ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+          ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+          ReplaceImm(imm, bias, multiplier, &base);
+
+          str += base;
           str += "\n";
         }
-        str += base;
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename ImmType>
+  std::string RepeatIbS(
+      void (Ass::*f)(ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) {
+    return RepeatTemplatedImmBitsShift<ImmType>(f, imm_bits, shift, fmt, bias);
+  }
+
+  template <typename ImmType>
+  std::string RepeatRIbS(
+      void (Ass::*f)(Reg, ImmType), int imm_bits, int shift, const std::string& fmt, int bias = 0) {
+    return RepeatTemplatedRegisterImmBitsShift<Reg, ImmType>(
+        f,
+        imm_bits,
+        shift,
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt,
+        bias);
+  }
+
+  template <typename ImmType>
+  std::string RepeatRRIbS(void (Ass::*f)(Reg, Reg, ImmType),
+                          int imm_bits,
+                          int shift,
+                          const std::string& fmt,
+                          int bias = 0) {
+    return RepeatTemplatedRegistersImmBitsShift<Reg, Reg, ImmType>(
+        f,
+        imm_bits,
+        shift,
+        GetRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt,
+        bias);
+  }
+
+  template <typename ImmType>
   std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType),
                          int imm_bits,
                          const std::string& fmt,
@@ -488,6 +536,19 @@ class AssemblerTest : public AssemblerTestBase {
                                                          fmt);
   }
 
+  std::string RepeatFFFF(void (Ass::*f)(FPReg, FPReg, FPReg, FPReg), const std::string& fmt) {
+    return RepeatTemplatedRegisters<FPReg, FPReg, FPReg, FPReg>(f,
+                                                                GetFPRegisters(),
+                                                                GetFPRegisters(),
+                                                                GetFPRegisters(),
+                                                                GetFPRegisters(),
+                                                                &AssemblerTest::GetFPRegName,
+                                                                &AssemblerTest::GetFPRegName,
+                                                                &AssemblerTest::GetFPRegName,
+                                                                &AssemblerTest::GetFPRegName,
+                                                                fmt);
+  }
+
   std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg, Reg>(
         f,
@@ -538,6 +599,32 @@ class AssemblerTest : public AssemblerTestBase {
                                                                   fmt);
   }
 
+  std::string RepeatRFF(void (Ass::*f)(Reg, FPReg, FPReg), const std::string& fmt) {
+    return RepeatTemplatedRegisters<Reg, FPReg, FPReg>(
+        f,
+        GetRegisters(),
+        GetFPRegisters(),
+        GetFPRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetFPRegName,
+        &AssemblerTest::GetFPRegName,
+        fmt);
+  }
+
+  template <typename ImmType>
+  std::string RepeatRFIb(void (Ass::*f)(Reg, FPReg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt) {
+    return RepeatTemplatedRegistersImmBits<Reg, FPReg, ImmType>(
+        f,
+        imm_bits,
+        GetRegisters(),
+        GetFPRegisters(),
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        &AssemblerTest::GetFPRegName,
+        fmt);
+  }
+
   std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
@@ -590,21 +677,11 @@ class AssemblerTest : public AssemblerTestBase {
       }
       std::string base = fmt;
 
-      size_t imm_index = base.find(IMM_TOKEN);
-      if (imm_index != std::string::npos) {
-        std::ostringstream sreg;
-        sreg << imm;
-        std::string imm_string = sreg.str();
-        base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
-      }
+      ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
 
-      if (str.size() > 0) {
-        str += "\n";
-      }
       str += base;
+      str += "\n";
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
@@ -710,36 +787,36 @@ class AssemblerTest : public AssemblerTestBase {
 
   // Returns a vector of registers used by any of the repeat methods
   // involving an "R" (e.g. RepeatR).
-  virtual std::vector<Reg*> GetRegisters() = 0;
+  virtual ArrayRef<const Reg> GetRegisters() = 0;
 
   // Returns a vector of fp-registers used by any of the repeat methods
   // involving an "F" (e.g. RepeatFF).
-  virtual std::vector<FPReg*> GetFPRegisters() {
+  virtual ArrayRef<const FPReg> GetFPRegisters() {
     UNIMPLEMENTED(FATAL) << "Architecture does not support floating-point registers";
     UNREACHABLE();
   }
 
   // Returns a vector of dedicated simd-registers used by any of the repeat
   // methods involving an "V" (e.g. RepeatVV).
-  virtual std::vector<VecReg*> GetVectorRegisters() {
+  virtual ArrayRef<const VecReg> GetVectorRegisters() {
     UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers";
     UNREACHABLE();
   }
 
   // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems.
-  virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+  virtual std::string GetSecondaryRegisterName([[maybe_unused]] const Reg& reg) {
     UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers";
     UNREACHABLE();
   }
 
   // Tertiary register names are the tertiary view on registers, e.g., 16b on 64b systems.
-  virtual std::string GetTertiaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+  virtual std::string GetTertiaryRegisterName([[maybe_unused]] const Reg& reg) {
     UNIMPLEMENTED(FATAL) << "Architecture does not support tertiary registers";
     UNREACHABLE();
   }
 
   // Quaternary register names are the quaternary view on registers, e.g., 8b on 64b systems.
-  virtual std::string GetQuaternaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
+  virtual std::string GetQuaternaryRegisterName([[maybe_unused]] const Reg& reg) {
     UNIMPLEMENTED(FATAL) << "Architecture does not support quaternary registers";
     UNREACHABLE();
   }
@@ -818,7 +895,9 @@ class AssemblerTest : public AssemblerTestBase {
   const int kMaxBitsExhaustiveTest = 8;
 
   // Create a couple of immediate values up to the number of bits given.
-  virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits, bool as_uint = false) {
+  virtual std::vector<int64_t> CreateImmediateValuesBits(const int imm_bits,
+                                                         bool as_uint = false,
+                                                         int shift = 0) {
     CHECK_GT(imm_bits, 0);
     CHECK_LE(imm_bits, 64);
     std::vector<int64_t> res;
@@ -826,11 +905,11 @@ class AssemblerTest : public AssemblerTestBase {
     if (imm_bits <= kMaxBitsExhaustiveTest) {
       if (as_uint) {
         for (uint64_t i = MinInt<uint64_t>(imm_bits); i <= MaxInt<uint64_t>(imm_bits); i++) {
-          res.push_back(static_cast<int64_t>(i));
+          res.push_back(static_cast<int64_t>(i << shift));
         }
       } else {
         for (int64_t i = MinInt<int64_t>(imm_bits); i <= MaxInt<int64_t>(imm_bits); i++) {
-          res.push_back(i);
+          res.push_back(i << shift);
         }
       }
     } else {
@@ -838,14 +917,14 @@ class AssemblerTest : public AssemblerTestBase {
         for (uint64_t i = MinInt<uint64_t>(kMaxBitsExhaustiveTest);
              i <= MaxInt<uint64_t>(kMaxBitsExhaustiveTest);
              i++) {
-          res.push_back(static_cast<int64_t>(i));
+          res.push_back(static_cast<int64_t>(i << shift));
         }
         for (int i = 0; i <= imm_bits; i++) {
           uint64_t j = (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1) +
                        ((MaxInt<uint64_t>(imm_bits) -
                         (MaxInt<uint64_t>(kMaxBitsExhaustiveTest) + 1))
                         * i / imm_bits);
-          res.push_back(static_cast<int64_t>(j));
+          res.push_back(static_cast<int64_t>(j << shift));
         }
       } else {
         for (int i = 0; i <= imm_bits; i++) {
@@ -853,18 +932,18 @@ class AssemblerTest : public AssemblerTestBase {
                       ((((MinInt<int64_t>(kMaxBitsExhaustiveTest) - 1) -
                          MinInt<int64_t>(imm_bits))
                         * i) / imm_bits);
-          res.push_back(static_cast<int64_t>(j));
+          res.push_back(static_cast<int64_t>(j << shift));
         }
         for (int64_t i = MinInt<int64_t>(kMaxBitsExhaustiveTest);
              i <= MaxInt<int64_t>(kMaxBitsExhaustiveTest);
              i++) {
-          res.push_back(static_cast<int64_t>(i));
+          res.push_back(static_cast<int64_t>(i << shift));
         }
         for (int i = 0; i <= imm_bits; i++) {
           int64_t j = (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1) +
                       ((MaxInt<int64_t>(imm_bits) - (MaxInt<int64_t>(kMaxBitsExhaustiveTest) + 1))
                        * i / imm_bits);
-          res.push_back(static_cast<int64_t>(j));
+          res.push_back(static_cast<int64_t>(j << shift));
         }
       }
     }
@@ -1111,19 +1190,11 @@ class AssemblerTest : public AssemblerTestBase {
       }
       std::string base = fmt;
 
-      std::string addr_string = (this->*GetAName)(addr);
-      size_t addr_index;
-      if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
-        base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
-      }
+      ReplaceAddr((this->*GetAName)(addr), &base);
 
-      if (str.size() > 0) {
-        str += "\n";
-      }
       str += base;
+      str += "\n";
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
@@ -1144,34 +1215,19 @@ class AssemblerTest : public AssemblerTestBase {
         }
         std::string base = fmt;
 
-        std::string addr_string = (this->*GetAName)(addr);
-        size_t addr_index;
-        if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
-          base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
-        }
-
-        size_t imm_index = base.find(IMM_TOKEN);
-        if (imm_index != std::string::npos) {
-          std::ostringstream sreg;
-          sreg << imm;
-          std::string imm_string = sreg.str();
-          base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
-        }
+        ReplaceAddr((this->*GetAName)(addr), &base);
+        ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
 
-        if (str.size() > 0) {
-          str += "\n";
-        }
         str += base;
+        str += "\n";
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename RegType, typename AddrType>
   std::string RepeatTemplatedRegMem(void (Ass::*f)(RegType, const AddrType&),
-                                    const std::vector<RegType*> registers,
+                                    ArrayRef<const RegType> registers,
                                     const std::vector<AddrType> addresses,
                                     std::string (AssemblerTest::*GetRName)(const RegType&),
                                     std::string (AssemblerTest::*GetAName)(const AddrType&),
@@ -1181,37 +1237,24 @@ class AssemblerTest : public AssemblerTestBase {
     for (auto reg : registers) {
       for (auto addr : addresses) {
         if (f != nullptr) {
-          (assembler_.get()->*f)(*reg, addr);
+          (assembler_.get()->*f)(reg, addr);
         }
         std::string base = fmt;
 
-        std::string reg_string = (this->*GetRName)(*reg);
-        size_t reg_index;
-        if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
-          base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
-        }
-
-        std::string addr_string = (this->*GetAName)(addr);
-        size_t addr_index;
-        if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
-          base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
-        }
+        ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base);
+        ReplaceAddr((this->*GetAName)(addr), &base);
 
-        if (str.size() > 0) {
-          str += "\n";
-        }
         str += base;
+        str += "\n";
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename AddrType, typename RegType>
   std::string RepeatTemplatedMemReg(void (Ass::*f)(const AddrType&, RegType),
                                     const std::vector<AddrType> addresses,
-                                    const std::vector<RegType*> registers,
+                                    ArrayRef<const RegType> registers,
                                     std::string (AssemblerTest::*GetAName)(const AddrType&),
                                     std::string (AssemblerTest::*GetRName)(const RegType&),
                                     const std::string& fmt) {
@@ -1220,30 +1263,17 @@ class AssemblerTest : public AssemblerTestBase {
     for (auto addr : addresses) {
       for (auto reg : registers) {
         if (f != nullptr) {
-          (assembler_.get()->*f)(addr, *reg);
+          (assembler_.get()->*f)(addr, reg);
         }
         std::string base = fmt;
 
-        std::string addr_string = (this->*GetAName)(addr);
-        size_t addr_index;
-        if ((addr_index = base.find(ADDRESS_TOKEN)) != std::string::npos) {
-          base.replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), addr_string);
-        }
+        ReplaceAddr((this->*GetAName)(addr), &base);
+        ReplaceReg(REG_TOKEN, (this->*GetRName)(reg), &base);
 
-        std::string reg_string = (this->*GetRName)(*reg);
-        size_t reg_index;
-        if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
-          base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
-        }
-
-        if (str.size() > 0) {
-          str += "\n";
-        }
         str += base;
+        str += "\n";
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
@@ -1253,36 +1283,28 @@ class AssemblerTest : public AssemblerTestBase {
 
   template <typename RegType>
   std::string RepeatTemplatedRegister(void (Ass::*f)(RegType),
-                                      const std::vector<RegType*> registers,
+                                      ArrayRef<const RegType> registers,
                                       std::string (AssemblerTest::*GetName)(const RegType&),
                                       const std::string& fmt) {
     std::string str;
     for (auto reg : registers) {
       if (f != nullptr) {
-        (assembler_.get()->*f)(*reg);
+        (assembler_.get()->*f)(reg);
       }
       std::string base = fmt;
 
-      std::string reg_string = (this->*GetName)(*reg);
-      size_t reg_index;
-      if ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
-        base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
-      }
+      ReplaceReg(REG_TOKEN, (this->*GetName)(reg), &base);
 
-      if (str.size() > 0) {
-        str += "\n";
-      }
       str += base;
+      str += "\n";
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename Reg1, typename Reg2>
   std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2),
-                                       const std::vector<Reg1*> reg1_registers,
-                                       const std::vector<Reg2*> reg2_registers,
+                                       ArrayRef<const Reg1> reg1_registers,
+                                       ArrayRef<const Reg2> reg2_registers,
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
                                        const std::string& fmt,
@@ -1294,44 +1316,31 @@ class AssemblerTest : public AssemblerTestBase {
       for (auto reg2 : reg2_registers) {
         // Check if this register pair is on the exception list. If so, skip it.
         if (except != nullptr) {
-          const auto& pair = std::make_pair(*reg1, *reg2);
+          const auto& pair = std::make_pair(reg1, reg2);
           if (std::find(except->begin(), except->end(), pair) != except->end()) {
             continue;
           }
         }
 
         if (f != nullptr) {
-          (assembler_.get()->*f)(*reg1, *reg2);
+          (assembler_.get()->*f)(reg1, reg2);
         }
         std::string base = fmt;
 
-        std::string reg1_string = (this->*GetName1)(*reg1);
-        size_t reg1_index;
-        while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
-          base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
-        }
-
-        std::string reg2_string = (this->*GetName2)(*reg2);
-        size_t reg2_index;
-        while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
-          base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
-        }
+        ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+        ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
 
-        if (str.size() > 0) {
-          str += "\n";
-        }
         str += base;
+        str += "\n";
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename Reg1, typename Reg2>
   std::string RepeatTemplatedRegistersNoDupes(void (Ass::*f)(Reg1, Reg2),
-                                              const std::vector<Reg1*> reg1_registers,
-                                              const std::vector<Reg2*> reg2_registers,
+                                              ArrayRef<const Reg1> reg1_registers,
+                                              ArrayRef<const Reg2> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               const std::string& fmt) {
@@ -1342,38 +1351,25 @@ class AssemblerTest : public AssemblerTestBase {
       for (auto reg2 : reg2_registers) {
         if (reg1 == reg2) continue;
         if (f != nullptr) {
-          (assembler_.get()->*f)(*reg1, *reg2);
+          (assembler_.get()->*f)(reg1, reg2);
         }
         std::string base = fmt;
 
-        std::string reg1_string = (this->*GetName1)(*reg1);
-        size_t reg1_index;
-        while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
-          base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
-        }
-
-        std::string reg2_string = (this->*GetName2)(*reg2);
-        size_t reg2_index;
-        while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
-          base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
-        }
+        ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+        ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
 
-        if (str.size() > 0) {
-          str += "\n";
-        }
         str += base;
+        str += "\n";
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename Reg1, typename Reg2, typename Reg3>
   std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3),
-                                       const std::vector<Reg1*> reg1_registers,
-                                       const std::vector<Reg2*> reg2_registers,
-                                       const std::vector<Reg3*> reg3_registers,
+                                       ArrayRef<const Reg1> reg1_registers,
+                                       ArrayRef<const Reg2> reg2_registers,
+                                       ArrayRef<const Reg3> reg3_registers,
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
                                        std::string (AssemblerTest::*GetName3)(const Reg3&),
@@ -1383,44 +1379,61 @@ class AssemblerTest : public AssemblerTestBase {
       for (auto reg2 : reg2_registers) {
         for (auto reg3 : reg3_registers) {
           if (f != nullptr) {
-            (assembler_.get()->*f)(*reg1, *reg2, *reg3);
+            (assembler_.get()->*f)(reg1, reg2, reg3);
           }
           std::string base = fmt;
 
-          std::string reg1_string = (this->*GetName1)(*reg1);
-          size_t reg1_index;
-          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
-            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
-          }
+          ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+          ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+          ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base);
 
-          std::string reg2_string = (this->*GetName2)(*reg2);
-          size_t reg2_index;
-          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
-            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
-          }
+          str += base;
+          str += "\n";
+        }
+      }
+    }
+    return str;
+  }
 
-          std::string reg3_string = (this->*GetName3)(*reg3);
-          size_t reg3_index;
-          while ((reg3_index = base.find(REG3_TOKEN)) != std::string::npos) {
-            base.replace(reg3_index, ConstexprStrLen(REG3_TOKEN), reg3_string);
-          }
+  template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
+  std::string RepeatTemplatedRegisters(void (Ass::*f)(Reg1, Reg2, Reg3, Reg4),
+                                       ArrayRef<const Reg1> reg1_registers,
+                                       ArrayRef<const Reg2> reg2_registers,
+                                       ArrayRef<const Reg3> reg3_registers,
+                                       ArrayRef<const Reg4> reg4_registers,
+                                       std::string (AssemblerTest::*GetName1)(const Reg1&),
+                                       std::string (AssemblerTest::*GetName2)(const Reg2&),
+                                       std::string (AssemblerTest::*GetName3)(const Reg3&),
+                                       std::string (AssemblerTest::*GetName4)(const Reg4&),
+                                       const std::string& fmt) {
+    std::string str;
+    for (auto reg1 : reg1_registers) {
+      for (auto reg2 : reg2_registers) {
+        for (auto reg3 : reg3_registers) {
+          for (auto reg4 : reg4_registers) {
+            if (f != nullptr) {
+              (assembler_.get()->*f)(reg1, reg2, reg3, reg4);
+            }
+            std::string base = fmt;
 
-          if (str.size() > 0) {
+            ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+            ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+            ReplaceReg(REG3_TOKEN, (this->*GetName3)(reg3), &base);
+            ReplaceReg(REG4_TOKEN, (this->*GetName4)(reg4), &base);
+
+            str += base;
             str += "\n";
           }
-          str += base;
         }
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   template <typename Reg1, typename Reg2>
   std::string RepeatTemplatedRegistersImm(void (Ass::*f)(Reg1, Reg2, const Imm&),
-                                          const std::vector<Reg1*> reg1_registers,
-                                          const std::vector<Reg2*> reg2_registers,
+                                          ArrayRef<const Reg1> reg1_registers,
+                                          ArrayRef<const Reg2> reg2_registers,
                                           std::string (AssemblerTest::*GetName1)(const Reg1&),
                                           std::string (AssemblerTest::*GetName2)(const Reg2&),
                                           size_t imm_bytes,
@@ -1434,39 +1447,19 @@ class AssemblerTest : public AssemblerTestBase {
         for (int64_t imm : imms) {
           Imm new_imm = CreateImmediate(imm);
           if (f != nullptr) {
-            (assembler_.get()->*f)(*reg1, *reg2, new_imm);
+            (assembler_.get()->*f)(reg1, reg2, new_imm);
           }
           std::string base = fmt;
 
-          std::string reg1_string = (this->*GetName1)(*reg1);
-          size_t reg1_index;
-          while ((reg1_index = base.find(REG1_TOKEN)) != std::string::npos) {
-            base.replace(reg1_index, ConstexprStrLen(REG1_TOKEN), reg1_string);
-          }
-
-          std::string reg2_string = (this->*GetName2)(*reg2);
-          size_t reg2_index;
-          while ((reg2_index = base.find(REG2_TOKEN)) != std::string::npos) {
-            base.replace(reg2_index, ConstexprStrLen(REG2_TOKEN), reg2_string);
-          }
-
-          size_t imm_index = base.find(IMM_TOKEN);
-          if (imm_index != std::string::npos) {
-            std::ostringstream sreg;
-            sreg << imm;
-            std::string imm_string = sreg.str();
-            base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
-          }
+          ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+          ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+          ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
 
-          if (str.size() > 0) {
-            str += "\n";
-          }
           str += base;
+          str += "\n";
         }
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
@@ -1517,11 +1510,41 @@ class AssemblerTest : public AssemblerTestBase {
     }
   }
 
+  static void ReplaceReg(const std::string& reg_token,
+                         const std::string& replacement,
+                         /*inout*/ std::string* str) {
+    size_t reg_index;
+    while ((reg_index = str->find(reg_token)) != std::string::npos) {
+      str->replace(reg_index, reg_token.length(), replacement);
+    }
+  }
+
+  static void ReplaceImm(int64_t imm,
+                         int64_t bias,
+                         int64_t multiplier,
+                         /*inout*/ std::string* str) {
+    size_t imm_index = str->find(IMM_TOKEN);
+    if (imm_index != std::string::npos) {
+      std::ostringstream sreg;
+      sreg << imm * multiplier + bias;
+      std::string imm_string = sreg.str();
+      str->replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
+    }
+  }
+
+  static void ReplaceAddr(const std::string& replacement, /*inout*/ std::string* str) {
+    size_t addr_index;
+    if ((addr_index = str->find(ADDRESS_TOKEN)) != std::string::npos) {
+      str->replace(addr_index, ConstexprStrLen(ADDRESS_TOKEN), replacement);
+    }
+  }
+
   static constexpr const char* ADDRESS_TOKEN = "{mem}";
   static constexpr const char* REG_TOKEN = "{reg}";
   static constexpr const char* REG1_TOKEN = "{reg1}";
   static constexpr const char* REG2_TOKEN = "{reg2}";
   static constexpr const char* REG3_TOKEN = "{reg3}";
+  static constexpr const char* REG4_TOKEN = "{reg4}";
   static constexpr const char* IMM_TOKEN = "{imm}";
 
  private:
@@ -1529,7 +1552,7 @@ class AssemblerTest : public AssemblerTestBase {
   std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&),
                                 size_t imm_bytes,
                                 const std::string& fmt) {
-    const std::vector<Reg*> registers = GetRegisters();
+    ArrayRef<const Reg> registers = GetRegisters();
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
 
@@ -1539,45 +1562,29 @@ class AssemblerTest : public AssemblerTestBase {
       for (int64_t imm : imms) {
         Imm new_imm = CreateImmediate(imm);
         if (f != nullptr) {
-          (assembler_.get()->*f)(*reg, new_imm);
+          (assembler_.get()->*f)(reg, new_imm);
         }
         std::string base = fmt;
 
-        std::string reg_string = GetRegName<kRegView>(*reg);
-        size_t reg_index;
-        while ((reg_index = base.find(REG_TOKEN)) != std::string::npos) {
-          base.replace(reg_index, ConstexprStrLen(REG_TOKEN), reg_string);
-        }
+        ReplaceReg(REG_TOKEN, GetRegName<kRegView>(reg), &base);
+        ReplaceImm(imm, /*bias=*/ 0, /*multiplier=*/ 1, &base);
 
-        size_t imm_index = base.find(IMM_TOKEN);
-        if (imm_index != std::string::npos) {
-          std::ostringstream sreg;
-          sreg << imm;
-          std::string imm_string = sreg.str();
-          base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
-        }
-
-        if (str.size() > 0) {
-          str += "\n";
-        }
         str += base;
+        str += "\n";
       }
     }
-    // Add a newline at the end.
-    str += "\n";
     return str;
   }
 
   // Override this to pad the code with NOPs to a certain size if needed.
-  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
-  }
+  virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {}
 
   void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
-    assembler_->FinalizeInstructions(code);
+    assembler_->CopyInstructions(code);
     Pad(*data);
     Driver(*data, assembly_text, test_name);
   }
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 73f3657413..6f836d3718 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -141,6 +141,16 @@ class AssemblerTestBase : public testing::Test {
   virtual std::vector<std::string> GetAssemblerCommand() {
     InstructionSet isa = GetIsa();
     switch (isa) {
+      case InstructionSet::kRiscv64:
+        // TODO(riscv64): Support compression (RV32C) in assembler and tests (add `c` to `-march=`).
+        return {FindTool("clang"),
+                "--compile",
+                "-target",
+                "riscv64-linux-gnu",
+                "-march=rv64imafd_zba_zbb",
+                // Force the assembler to fully emit branch instructions instead of leaving
+                // offsets unresolved with relocation information for the linker.
+                "-mno-relax"};
       case InstructionSet::kX86:
         return {FindTool("clang"), "--compile", "-target", "i386-linux-gnu"};
       case InstructionSet::kX86_64:
@@ -159,6 +169,15 @@ class AssemblerTestBase : public testing::Test {
                 "--no-print-imm-hex",
                 "--triple",
                 "thumbv7a-linux-gnueabi"};
+      case InstructionSet::kRiscv64:
+        return {FindTool("llvm-objdump"),
+                "--disassemble",
+                "--no-print-imm-hex",
+                "--no-show-raw-insn",
+                // Disassemble Standard Extensions supported by the assembler.
+                "--mattr=+F,+D,+A,+Zba,+Zbb",
+                "-M",
+                "no-aliases"};
       default:
         return {
             FindTool("llvm-objdump"), "--disassemble", "--no-print-imm-hex", "--no-show-raw-insn"};
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 672cd3d10f..53cb3d6f8e 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -79,7 +79,7 @@ class ArmVIXLAssemblerTest : public AssemblerTestBase {
     size_t cs = __ CodeSize();
     std::vector<uint8_t> managed_code(cs);
     MemoryRegion code(&managed_code[0], managed_code.size());
-    __ FinalizeInstructions(code);
+    __ CopyInstructions(code);
 
     DumpAndCheck(managed_code, testname, expected);
   }
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
index 8b47b38e63..dc7ec60032 100644
--- a/compiler/utils/jni_macro_assembler.cc
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -25,6 +25,9 @@
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "arm64/jni_macro_assembler_arm64.h"
 #endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+#include "riscv64/jni_macro_assembler_riscv64.h"
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86
 #include "x86/jni_macro_assembler_x86.h"
 #endif
@@ -34,6 +37,7 @@
 #include "base/casts.h"
 #include "base/globals.h"
 #include "base/memory_region.h"
+#include "gc_root.h"
 
 namespace art HIDDEN {
 
@@ -79,6 +83,10 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
     case InstructionSet::kArm64:
       return MacroAsm64UniquePtr(new (allocator) arm64::Arm64JNIMacroAssembler(allocator));
 #endif
+#ifdef ART_ENABLE_CODEGEN_riscv64
+    case InstructionSet::kRiscv64:
+      return MacroAsm64UniquePtr(new (allocator) riscv64::Riscv64JNIMacroAssembler(allocator));
+#endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case InstructionSet::kX86_64:
       return MacroAsm64UniquePtr(new (allocator) x86_64::X86_64JNIMacroAssembler(allocator));
@@ -90,4 +98,21 @@ MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
   }
 }
 
+template <PointerSize kPointerSize>
+void JNIMacroAssembler<kPointerSize>::LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+                                                                   ManagedRegister base,
+                                                                   MemberOffset offs) {
+  static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
+  Load(dest, base, offs, sizeof(uint32_t));
+}
+
+template
+void JNIMacroAssembler<PointerSize::k32>::LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+                                                                       ManagedRegister base,
+                                                                       MemberOffset offs);
+template
+void JNIMacroAssembler<PointerSize::k64>::LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+                                                                       ManagedRegister base,
+                                                                       MemberOffset offs);
+
 }  // namespace art
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 0c729705dc..2d51439ee8 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -92,7 +92,7 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
   virtual size_t CodeSize() const = 0;
 
   // Copy instructions out of assembly buffer into the given region of memory
-  virtual void FinalizeInstructions(const MemoryRegion& region) = 0;
+  virtual void CopyInstructions(const MemoryRegion& region) = 0;
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size,
@@ -129,9 +129,14 @@ class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
   // Load routines
   virtual void Load(ManagedRegister dest, FrameOffset src, size_t size) = 0;
   virtual void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) = 0;
-
   virtual void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset<kPointerSize> offs) = 0;
 
+  // Load reference from a `GcRoot<>`. The default is to load as `jint`. Some architectures
+  // (say, RISC-V) override this to provide a different sign- or zero-extension.
+  virtual void LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+                                            ManagedRegister base,
+                                            MemberOffset offs);
+
   // Copying routines
 
   // Move arguments from `srcs` locations to `dests` locations.
@@ -266,8 +271,8 @@ class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> {
     return asm_.CodeSize();
   }
 
-  void FinalizeInstructions(const MemoryRegion& region) override {
-    asm_.FinalizeInstructions(region);
+  void CopyInstructions(const MemoryRegion& region) override {
+    asm_.CopyInstructions(region);
   }
 
   DebugFrameOpCodeWriterForAssembler& cfi() override {
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
index ac8e7d3010..ff182e6146 100644
--- a/compiler/utils/jni_macro_assembler_test.h
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -77,15 +77,14 @@ class JNIMacroAssemblerTest : public AssemblerTestBase {
 
  private:
   // Override this to pad the code with NOPs to a certain size if needed.
-  virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
-  }
+  virtual void Pad([[maybe_unused]] std::vector<uint8_t>& data) {}
 
   void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
     MemoryRegion code(&(*data)[0], data->size());
-    assembler_->FinalizeInstructions(code);
+    assembler_->CopyInstructions(code);
     Pad(*data);
     Driver(*data, assembly_text, test_name);
   }
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 0368d90a26..25bf01376b 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -31,6 +31,10 @@ class AssemblerFixup;
 namespace arm64 {
 class Arm64Assembler;
 }  // namespace arm64
+namespace riscv64 {
+class Riscv64Assembler;
+class Riscv64Label;
+}  // namespace riscv64
 namespace x86 {
 class X86Assembler;
 class NearLabel;
@@ -109,6 +113,8 @@ class Label {
   }
 
   friend class arm64::Arm64Assembler;
+  friend class riscv64::Riscv64Assembler;
+  friend class riscv64::Riscv64Label;
   friend class x86::X86Assembler;
   friend class x86::NearLabel;
   friend class x86_64::X86_64Assembler;
diff --git a/compiler/utils/riscv64/assembler_riscv64.cc b/compiler/utils/riscv64/assembler_riscv64.cc
new file mode 100644
index 0000000000..089bc5dfe6
--- /dev/null
+++ b/compiler/utils/riscv64/assembler_riscv64.cc
@@ -0,0 +1,2422 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_riscv64.h"
+
+#include "base/bit_utils.h"
+#include "base/casts.h"
+#include "base/logging.h"
+#include "base/memory_region.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+static_assert(static_cast<size_t>(kRiscv64PointerSize) == kRiscv64DoublewordSize,
+              "Unexpected Riscv64 pointer size.");
+static_assert(kRiscv64PointerSize == PointerSize::k64, "Unexpected Riscv64 pointer size.");
+
+// Split 32-bit offset into an `imm20` for LUI/AUIPC and
+// a signed 12-bit short offset for ADDI/JALR/etc.
+ALWAYS_INLINE static inline std::pair<uint32_t, int32_t> SplitOffset(int32_t offset) {
+  // The highest 0x800 values are out of range.
+  DCHECK_LT(offset, 0x7ffff800);
+  // Round `offset` to nearest 4KiB offset because short offset has range [-0x800, 0x800).
+  int32_t near_offset = (offset + 0x800) & ~0xfff;
+  // Calculate the short offset.
+  int32_t short_offset = offset - near_offset;
+  DCHECK(IsInt<12>(short_offset));
+  // Extract the `imm20`.
+  uint32_t imm20 = static_cast<uint32_t>(near_offset) >> 12;
+  // Return the result as a pair.
+  return std::make_pair(imm20, short_offset);
+}
+
+ALWAYS_INLINE static inline int32_t ToInt12(uint32_t uint12) {
+  DCHECK(IsUint<12>(uint12));
+  return static_cast<int32_t>(uint12 - ((uint12 & 0x800) << 1));
+}
+
+void Riscv64Assembler::FinalizeCode() {
+  CHECK(!finalized_);
+  Assembler::FinalizeCode();
+  ReserveJumpTableSpace();
+  EmitLiterals();
+  PromoteBranches();
+  EmitBranches();
+  EmitJumpTables();
+  PatchCFI();
+  finalized_ = true;
+}
+
+void Riscv64Assembler::Emit(uint32_t value) {
+  if (overwriting_) {
+    // Branches to labels are emitted into their placeholders here.
+    buffer_.Store<uint32_t>(overwrite_location_, value);
+    overwrite_location_ += sizeof(uint32_t);
+  } else {
+    // Other instructions are simply appended at the end here.
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    buffer_.Emit<uint32_t>(value);
+  }
+}
+
+/////////////////////////////// RV64 VARIANTS extension ///////////////////////////////
+
+//////////////////////////////// RV64 "I" Instructions ////////////////////////////////
+
+// LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
+
+void Riscv64Assembler::Lui(XRegister rd, uint32_t imm20) {
+  EmitU(imm20, rd, 0x37);
+}
+
+void Riscv64Assembler::Auipc(XRegister rd, uint32_t imm20) {
+  EmitU(imm20, rd, 0x17);
+}
+
+// Jump instructions (RV32I), opcode = 0x67, 0x6f
+
+void Riscv64Assembler::Jal(XRegister rd, int32_t offset) {
+  EmitJ(offset, rd, 0x6F);
+}
+
+void Riscv64Assembler::Jalr(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x0, rd, 0x67);
+}
+
+// Branch instructions, opcode = 0x63 (subfunc from 0x0 ~ 0x7), 0x67, 0x6f
+
+void Riscv64Assembler::Beq(XRegister rs1, XRegister rs2, int32_t offset) {
+  EmitB(offset, rs2, rs1, 0x0, 0x63);
+}
+
+void Riscv64Assembler::Bne(XRegister rs1, XRegister rs2, int32_t offset) {
+  EmitB(offset, rs2, rs1, 0x1, 0x63);
+}
+
+void Riscv64Assembler::Blt(XRegister rs1, XRegister rs2, int32_t offset) {
+  EmitB(offset, rs2, rs1, 0x4, 0x63);
+}
+
+void Riscv64Assembler::Bge(XRegister rs1, XRegister rs2, int32_t offset) {
+  EmitB(offset, rs2, rs1, 0x5, 0x63);
+}
+
+void Riscv64Assembler::Bltu(XRegister rs1, XRegister rs2, int32_t offset) {
+  EmitB(offset, rs2, rs1, 0x6, 0x63);
+}
+
+void Riscv64Assembler::Bgeu(XRegister rs1, XRegister rs2, int32_t offset) {
+  EmitB(offset, rs2, rs1, 0x7, 0x63);
+}
+
+// Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
+
+void Riscv64Assembler::Lb(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x0, rd, 0x03);
+}
+
+void Riscv64Assembler::Lh(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x1, rd, 0x03);
+}
+
+void Riscv64Assembler::Lw(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x2, rd, 0x03);
+}
+
+void Riscv64Assembler::Ld(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x3, rd, 0x03);
+}
+
+void Riscv64Assembler::Lbu(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x4, rd, 0x03);
+}
+
+void Riscv64Assembler::Lhu(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x5, rd, 0x03);
+}
+
+void Riscv64Assembler::Lwu(XRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x6, rd, 0x3);
+}
+
+// Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
+
+void Riscv64Assembler::Sb(XRegister rs2, XRegister rs1, int32_t offset) {
+  EmitS(offset, rs2, rs1, 0x0, 0x23);
+}
+
+void Riscv64Assembler::Sh(XRegister rs2, XRegister rs1, int32_t offset) {
+  EmitS(offset, rs2, rs1, 0x1, 0x23);
+}
+
+void Riscv64Assembler::Sw(XRegister rs2, XRegister rs1, int32_t offset) {
+  EmitS(offset, rs2, rs1, 0x2, 0x23);
+}
+
+void Riscv64Assembler::Sd(XRegister rs2, XRegister rs1, int32_t offset) {
+  EmitS(offset, rs2, rs1, 0x3, 0x23);
+}
+
+// IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Addi(XRegister rd, XRegister rs1, int32_t imm12) {
+  EmitI(imm12, rs1, 0x0, rd, 0x13);
+}
+
+void Riscv64Assembler::Slti(XRegister rd, XRegister rs1, int32_t imm12) {
+  EmitI(imm12, rs1, 0x2, rd, 0x13);
+}
+
+void Riscv64Assembler::Sltiu(XRegister rd, XRegister rs1, int32_t imm12) {
+  EmitI(imm12, rs1, 0x3, rd, 0x13);
+}
+
+void Riscv64Assembler::Xori(XRegister rd, XRegister rs1, int32_t imm12) {
+  EmitI(imm12, rs1, 0x4, rd, 0x13);
+}
+
+void Riscv64Assembler::Ori(XRegister rd, XRegister rs1, int32_t imm12) {
+  EmitI(imm12, rs1, 0x6, rd, 0x13);
+}
+
+void Riscv64Assembler::Andi(XRegister rd, XRegister rs1, int32_t imm12) {
+  EmitI(imm12, rs1, 0x7, rd, 0x13);
+}
+
+// 0x1 Split: 0x0(6b) + imm12(6b)
+void Riscv64Assembler::Slli(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+  EmitI6(0x0, shamt, rs1, 0x1, rd, 0x13);
+}
+
+// 0x5 Split: 0x0(6b) + imm12(6b)
+void Riscv64Assembler::Srli(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+  EmitI6(0x0, shamt, rs1, 0x5, rd, 0x13);
+}
+
+// 0x5 Split: 0x10(6b) + imm12(6b)
+void Riscv64Assembler::Srai(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+  EmitI6(0x10, shamt, rs1, 0x5, rd, 0x13);
+}
+
+// ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Add(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x0, rd, 0x33);
+}
+
+void Riscv64Assembler::Sub(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x20, rs2, rs1, 0x0, rd, 0x33);
+}
+
+void Riscv64Assembler::Slt(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x02, rd, 0x33);
+}
+
+void Riscv64Assembler::Sltu(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x03, rd, 0x33);
+}
+
+void Riscv64Assembler::Xor(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x04, rd, 0x33);
+}
+
+void Riscv64Assembler::Or(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x06, rd, 0x33);
+}
+
+void Riscv64Assembler::And(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x07, rd, 0x33);
+}
+
+void Riscv64Assembler::Sll(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x01, rd, 0x33);
+}
+
+void Riscv64Assembler::Srl(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x05, rd, 0x33);
+}
+
+void Riscv64Assembler::Sra(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x20, rs2, rs1, 0x05, rd, 0x33);
+}
+
+// 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
+
+void Riscv64Assembler::Addiw(XRegister rd, XRegister rs1, int32_t imm12) {
+  EmitI(imm12, rs1, 0x0, rd, 0x1b);
+}
+
+void Riscv64Assembler::Slliw(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+  EmitR(0x0, shamt, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Srliw(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+  EmitR(0x0, shamt, rs1, 0x5, rd, 0x1b);
+}
+
+void Riscv64Assembler::Sraiw(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+  EmitR(0x20, shamt, rs1, 0x5, rd, 0x1b);
+}
+
+// 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Addw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Subw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x20, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sllw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x1, rd, 0x3b);
+}
+
+void Riscv64Assembler::Srlw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x0, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sraw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x20, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+// Environment call and breakpoint (RV32I), opcode = 0x73
+
+void Riscv64Assembler::Ecall() { EmitI(0x0, 0x0, 0x0, 0x0, 0x73); }
+
+void Riscv64Assembler::Ebreak() { EmitI(0x1, 0x0, 0x0, 0x0, 0x73); }
+
+// Fence instruction (RV32I): opcode = 0xf, funct3 = 0
+
+void Riscv64Assembler::Fence(uint32_t pred, uint32_t succ) {
+  DCHECK(IsUint<4>(pred));
+  DCHECK(IsUint<4>(succ));
+  EmitI(/* normal fence */ 0x0 << 8 | pred << 4 | succ, 0x0, 0x0, 0x0, 0xf);
+}
+
+void Riscv64Assembler::FenceTso() {
+  static constexpr uint32_t kPred = kFenceWrite | kFenceRead;
+  static constexpr uint32_t kSucc = kFenceWrite | kFenceRead;
+  EmitI(ToInt12(/* TSO fence */ 0x8 << 8 | kPred << 4 | kSucc), 0x0, 0x0, 0x0, 0xf);
+}
+
+//////////////////////////////// RV64 "I" Instructions  END ////////////////////////////////
+
+/////////////////////////// RV64 "Zifencei" Instructions  START ////////////////////////////
+
+// "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
+void Riscv64Assembler::FenceI() { EmitI(0x0, 0x0, 0x1, 0x0, 0xf); }
+
+//////////////////////////// RV64 "Zifencei" Instructions  END /////////////////////////////
+
+/////////////////////////////// RV64 "M" Instructions  START ///////////////////////////////
+
+// RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
+
+void Riscv64Assembler::Mul(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x0, rd, 0x33);
+}
+
+void Riscv64Assembler::Mulh(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x1, rd, 0x33);
+}
+
+void Riscv64Assembler::Mulhsu(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x2, rd, 0x33);
+}
+
+void Riscv64Assembler::Mulhu(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x3, rd, 0x33);
+}
+
+void Riscv64Assembler::Div(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Divu(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x5, rd, 0x33);
+}
+
+void Riscv64Assembler::Rem(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Remu(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x7, rd, 0x33);
+}
+
+// RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
+
+void Riscv64Assembler::Mulw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Divw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x4, rd, 0x3b);
+}
+
+void Riscv64Assembler::Divuw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+void Riscv64Assembler::Remw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x6, rd, 0x3b);
+}
+
+void Riscv64Assembler::Remuw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x1, rs2, rs1, 0x7, rd, 0x3b);
+}
+
+//////////////////////////////// RV64 "M" Instructions  END ////////////////////////////////
+
+/////////////////////////////// RV64 "A" Instructions  START ///////////////////////////////
+
+void Riscv64Assembler::LrW(XRegister rd, XRegister rs1, AqRl aqrl) {
+  CHECK(aqrl != AqRl::kRelease);
+  EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::LrD(XRegister rd, XRegister rs1, AqRl aqrl) {
+  CHECK(aqrl != AqRl::kRelease);
+  EmitR4(0x2, enum_cast<uint32_t>(aqrl), 0x0, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  CHECK(aqrl != AqRl::kAcquire);
+  EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  CHECK(aqrl != AqRl::kAcquire);
+  EmitR4(0x3, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x1, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x0, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x4, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0xc, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x8, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x10, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x14, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x18, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x2, rd, 0x2f);
+}
+
+void Riscv64Assembler::AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl) {
+  EmitR4(0x1c, enum_cast<uint32_t>(aqrl), rs2, rs1, 0x3, rd, 0x2f);
+}
+
+/////////////////////////////// RV64 "A" Instructions  END ///////////////////////////////
+
+///////////////////////////// RV64 "Zicsr" Instructions  START /////////////////////////////
+
+// "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
+
+void Riscv64Assembler::Csrrw(XRegister rd, uint32_t csr, XRegister rs1) {
+  EmitI(ToInt12(csr), rs1, 0x1, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrs(XRegister rd, uint32_t csr, XRegister rs1) {
+  EmitI(ToInt12(csr), rs1, 0x2, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrc(XRegister rd, uint32_t csr, XRegister rs1) {
+  EmitI(ToInt12(csr), rs1, 0x3, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5) {
+  EmitI(ToInt12(csr), uimm5, 0x5, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5) {
+  EmitI(ToInt12(csr), uimm5, 0x6, rd, 0x73);
+}
+
+void Riscv64Assembler::Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5) {
+  EmitI(ToInt12(csr), uimm5, 0x7, rd, 0x73);
+}
+
+////////////////////////////// RV64 "Zicsr" Instructions  END //////////////////////////////
+
+/////////////////////////////// RV64 "FD" Instructions  START ///////////////////////////////
+
+// FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
+
+void Riscv64Assembler::FLw(FRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x2, rd, 0x07);
+}
+
+void Riscv64Assembler::FLd(FRegister rd, XRegister rs1, int32_t offset) {
+  EmitI(offset, rs1, 0x3, rd, 0x07);
+}
+
+void Riscv64Assembler::FSw(FRegister rs2, XRegister rs1, int32_t offset) {
+  EmitS(offset, rs2, rs1, 0x2, 0x27);
+}
+
+void Riscv64Assembler::FSd(FRegister rs2, XRegister rs1, int32_t offset) {
+  EmitS(offset, rs2, rs1, 0x3, 0x27);
+}
+
+// FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
+
+void Riscv64Assembler::FMAddS(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43);
+}
+
+void Riscv64Assembler::FMAddD(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x43);
+}
+
+void Riscv64Assembler::FMSubS(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47);
+}
+
+void Riscv64Assembler::FMSubD(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x47);
+}
+
+void Riscv64Assembler::FNMSubS(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b);
+}
+
+void Riscv64Assembler::FNMSubD(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4b);
+}
+
+void Riscv64Assembler::FNMAddS(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f);
+}
+
+void Riscv64Assembler::FNMAddD(
+    FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm) {
+  EmitR4(rs3, 0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x4f);
+}
+
+// Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
+
+void Riscv64Assembler::FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0x0, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0x1, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0x4, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0x5, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0x8, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0x9, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0xc, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm) {
+  EmitR(0xd, rs2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x2c, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x2d, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjS(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjD(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x11, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x11, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x11, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FMinS(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x14, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMinD(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x15, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMaxS(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x14, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FMaxD(FRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x15, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x20, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm) {
+  // Note: The `frm` is useless, the result can represent every value of the source exactly.
+  EmitR(0x21, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+// FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
+
+void Riscv64Assembler::FEqS(XRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x50, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FEqD(XRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x51, rs2, rs1, 0x2, rd, 0x53);
+}
+
+void Riscv64Assembler::FLtS(XRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x50, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FLtD(XRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x51, rs2, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FLeS(XRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x50, rs2, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FLeD(XRegister rd, FRegister rs1, FRegister rs2) {
+  EmitR(0x51, rs2, rs1, 0x0, rd, 0x53);
+}
+
+// FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
+
+void Riscv64Assembler::FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x60, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x61, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x60, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x61, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x60, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x61, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x60, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm) {
+  EmitR(0x61, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  EmitR(0x68, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  // Note: The `frm` is useless, the result can represent every value of the source exactly.
+  EmitR(0x69, 0x0, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  EmitR(0x68, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  // Note: The `frm` is useless, the result can represent every value of the source exactly.
+  EmitR(0x69, 0x1, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  EmitR(0x68, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  EmitR(0x69, 0x2, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  EmitR(0x68, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+void Riscv64Assembler::FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm) {
+  EmitR(0x69, 0x3, rs1, enum_cast<uint32_t>(frm), rd, 0x53);
+}
+
+// FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
+
+void Riscv64Assembler::FMvXW(XRegister rd, FRegister rs1) {
+  EmitR(0x70, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMvXD(XRegister rd, FRegister rs1) {
+  EmitR(0x71, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMvWX(FRegister rd, XRegister rs1) {
+  EmitR(0x78, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+void Riscv64Assembler::FMvDX(FRegister rd, XRegister rs1) {
+  EmitR(0x79, 0x0, rs1, 0x0, rd, 0x53);
+}
+
+// FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
+
+void Riscv64Assembler::FClassS(XRegister rd, FRegister rs1) {
+  EmitR(0x70, 0x0, rs1, 0x1, rd, 0x53);
+}
+
+void Riscv64Assembler::FClassD(XRegister rd, FRegister rs1) {
+  EmitR(0x71, 0x0, rs1, 0x1, rd, 0x53);
+}
+
+/////////////////////////////// RV64 "FD" Instructions  END ///////////////////////////////
+
+////////////////////////////// RV64 "Zba" Instructions  START /////////////////////////////
+
+void Riscv64Assembler::AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x4, rs2, rs1, 0x0, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sh1Add(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x2, rd, 0x33);
+}
+
+void Riscv64Assembler::Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x2, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sh2Add(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x4, rd, 0x3b);
+}
+
+void Riscv64Assembler::Sh3Add(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x10, rs2, rs1, 0x6, rd, 0x3b);
+}
+
+void Riscv64Assembler::SlliUw(XRegister rd, XRegister rs1, int32_t shamt) {
+  EmitI6(0x2, shamt, rs1, 0x1, rd, 0x1b);
+}
+
+/////////////////////////////// RV64 "Zba" Instructions  END //////////////////////////////
+
+////////////////////////////// RV64 "Zbb" Instructions  START /////////////////////////////
+
+void Riscv64Assembler::Andn(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x20, rs2, rs1, 0x7, rd, 0x33);
+}
+
+void Riscv64Assembler::Orn(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x20, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Xnor(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x20, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Clz(XRegister rd, XRegister rs1) {
+  EmitR(0x30, 0x0, rs1, 0x1, rd, 0x13);
+}
+
+void Riscv64Assembler::Clzw(XRegister rd, XRegister rs1) {
+  EmitR(0x30, 0x0, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Ctz(XRegister rd, XRegister rs1) {
+  EmitR(0x30, 0x1, rs1, 0x1, rd, 0x13);
+}
+
+void Riscv64Assembler::Ctzw(XRegister rd, XRegister rs1) {
+  EmitR(0x30, 0x1, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Cpop(XRegister rd, XRegister rs1) {
+  EmitR(0x30, 0x2, rs1, 0x1, rd, 0x13);
+}
+
+void Riscv64Assembler::Cpopw(XRegister rd, XRegister rs1) {
+  EmitR(0x30, 0x2, rs1, 0x1, rd, 0x1b);
+}
+
+void Riscv64Assembler::Min(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x5, rs2, rs1, 0x4, rd, 0x33);
+}
+
+void Riscv64Assembler::Minu(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x5, rs2, rs1, 0x5, rd, 0x33);
+}
+
+void Riscv64Assembler::Max(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x5, rs2, rs1, 0x6, rd, 0x33);
+}
+
+void Riscv64Assembler::Maxu(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x5, rs2, rs1, 0x7, rd, 0x33);
+}
+
+void Riscv64Assembler::Rol(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x30, rs2, rs1, 0x1, rd, 0x33);
+}
+
+void Riscv64Assembler::Rolw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x30, rs2, rs1, 0x1, rd, 0x3b);
+}
+
+void Riscv64Assembler::Ror(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x30, rs2, rs1, 0x5, rd, 0x33);
+}
+
+void Riscv64Assembler::Rorw(XRegister rd, XRegister rs1, XRegister rs2) {
+  EmitR(0x30, rs2, rs1, 0x5, rd, 0x3b);
+}
+
+void Riscv64Assembler::Rori(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 64u);
+  EmitI6(0x18, shamt, rs1, 0x5, rd, 0x13);
+}
+
+void Riscv64Assembler::Roriw(XRegister rd, XRegister rs1, int32_t shamt) {
+  CHECK_LT(static_cast<uint32_t>(shamt), 32u);
+  EmitI6(0x18, shamt, rs1, 0x5, rd, 0x1b);
+}
+
+void Riscv64Assembler::OrcB(XRegister rd, XRegister rs1) {
+  EmitR(0x14, 0x7, rs1, 0x5, rd, 0x13);
+}
+
+void Riscv64Assembler::Rev8(XRegister rd, XRegister rs1) {
+  EmitR(0x35, 0x18, rs1, 0x5, rd, 0x13);
+}
+
+/////////////////////////////// RV64 "Zbb" Instructions  END //////////////////////////////
+
+////////////////////////////// RV64 MACRO Instructions  START ///////////////////////////////
+
+// Pseudo instructions
+
+void Riscv64Assembler::Nop() { Addi(Zero, Zero, 0); }
+
+void Riscv64Assembler::Li(XRegister rd, int64_t imm) {
+  LoadImmediate(rd, imm, /*can_use_tmp=*/ false);
+}
+
+void Riscv64Assembler::Mv(XRegister rd, XRegister rs) { Addi(rd, rs, 0); }
+
+void Riscv64Assembler::Not(XRegister rd, XRegister rs) { Xori(rd, rs, -1); }
+
+void Riscv64Assembler::Neg(XRegister rd, XRegister rs) { Sub(rd, Zero, rs); }
+
+void Riscv64Assembler::NegW(XRegister rd, XRegister rs) { Subw(rd, Zero, rs); }
+
+void Riscv64Assembler::SextB(XRegister rd, XRegister rs) {
+  Slli(rd, rs, kXlen - 8u);
+  Srai(rd, rd, kXlen - 8u);
+}
+
+void Riscv64Assembler::SextH(XRegister rd, XRegister rs) {
+  Slli(rd, rs, kXlen - 16u);
+  Srai(rd, rd, kXlen - 16u);
+}
+
+void Riscv64Assembler::SextW(XRegister rd, XRegister rs) { Addiw(rd, rs, 0); }
+
+void Riscv64Assembler::ZextB(XRegister rd, XRegister rs) { Andi(rd, rs, 0xff); }
+
+void Riscv64Assembler::ZextH(XRegister rd, XRegister rs) {
+  Slli(rd, rs, kXlen - 16u);
+  Srli(rd, rd, kXlen - 16u);
+}
+
+void Riscv64Assembler::ZextW(XRegister rd, XRegister rs) {
+  // TODO(riscv64): Use the ZEXT.W alias for ADD.UW from the Zba extension.
+  Slli(rd, rs, kXlen - 32u);
+  Srli(rd, rd, kXlen - 32u);
+}
+
+void Riscv64Assembler::Seqz(XRegister rd, XRegister rs) { Sltiu(rd, rs, 1); }
+
+void Riscv64Assembler::Snez(XRegister rd, XRegister rs) { Sltu(rd, Zero, rs); }
+
+void Riscv64Assembler::Sltz(XRegister rd, XRegister rs) { Slt(rd, rs, Zero); }
+
+void Riscv64Assembler::Sgtz(XRegister rd, XRegister rs) { Slt(rd, Zero, rs); }
+
+void Riscv64Assembler::FMvS(FRegister rd, FRegister rs) { FSgnjS(rd, rs, rs); }
+
+void Riscv64Assembler::FAbsS(FRegister rd, FRegister rs) { FSgnjxS(rd, rs, rs); }
+
+void Riscv64Assembler::FNegS(FRegister rd, FRegister rs) { FSgnjnS(rd, rs, rs); }
+
+void Riscv64Assembler::FMvD(FRegister rd, FRegister rs) { FSgnjD(rd, rs, rs); }
+
+void Riscv64Assembler::FAbsD(FRegister rd, FRegister rs) { FSgnjxD(rd, rs, rs); }
+
+void Riscv64Assembler::FNegD(FRegister rd, FRegister rs) { FSgnjnD(rd, rs, rs); }
+
+void Riscv64Assembler::Beqz(XRegister rs, int32_t offset) {
+  Beq(rs, Zero, offset);
+}
+
+void Riscv64Assembler::Bnez(XRegister rs, int32_t offset) {
+  Bne(rs, Zero, offset);
+}
+
+void Riscv64Assembler::Blez(XRegister rt, int32_t offset) {
+  Bge(Zero, rt, offset);
+}
+
+void Riscv64Assembler::Bgez(XRegister rt, int32_t offset) {
+  Bge(rt, Zero, offset);
+}
+
+void Riscv64Assembler::Bltz(XRegister rt, int32_t offset) {
+  Blt(rt, Zero, offset);
+}
+
+void Riscv64Assembler::Bgtz(XRegister rt, int32_t offset) {
+  Blt(Zero, rt, offset);
+}
+
+void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, int32_t offset) {
+  Blt(rt, rs, offset);
+}
+
+void Riscv64Assembler::Ble(XRegister rs, XRegister rt, int32_t offset) {
+  Bge(rt, rs, offset);
+}
+
+void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, int32_t offset) {
+  Bltu(rt, rs, offset);
+}
+
+void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, int32_t offset) {
+  Bgeu(rt, rs, offset);
+}
+
+void Riscv64Assembler::J(int32_t offset) { Jal(Zero, offset); }
+
+void Riscv64Assembler::Jal(int32_t offset) { Jal(RA, offset); }
+
+void Riscv64Assembler::Jr(XRegister rs) { Jalr(Zero, rs, 0); }
+
+void Riscv64Assembler::Jalr(XRegister rs) { Jalr(RA, rs, 0); }
+
+void Riscv64Assembler::Jalr(XRegister rd, XRegister rs) { Jalr(rd, rs, 0); }
+
+void Riscv64Assembler::Ret() { Jalr(Zero, RA, 0); }
+
+void Riscv64Assembler::RdCycle(XRegister rd) {
+  Csrrs(rd, 0xc00, Zero);
+}
+
+void Riscv64Assembler::RdTime(XRegister rd) {
+  Csrrs(rd, 0xc01, Zero);
+}
+
+void Riscv64Assembler::RdInstret(XRegister rd) {
+  Csrrs(rd, 0xc02, Zero);
+}
+
+void Riscv64Assembler::Csrr(XRegister rd, uint32_t csr) {
+  Csrrs(rd, csr, Zero);
+}
+
+void Riscv64Assembler::Csrw(uint32_t csr, XRegister rs) {
+  Csrrw(Zero, csr, rs);
+}
+
+void Riscv64Assembler::Csrs(uint32_t csr, XRegister rs) {
+  Csrrs(Zero, csr, rs);
+}
+
+void Riscv64Assembler::Csrc(uint32_t csr, XRegister rs) {
+  Csrrc(Zero, csr, rs);
+}
+
+void Riscv64Assembler::Csrwi(uint32_t csr, uint32_t uimm5) {
+  Csrrwi(Zero, csr, uimm5);
+}
+
+void Riscv64Assembler::Csrsi(uint32_t csr, uint32_t uimm5) {
+  Csrrsi(Zero, csr, uimm5);
+}
+
+void Riscv64Assembler::Csrci(uint32_t csr, uint32_t uimm5) {
+  Csrrci(Zero, csr, uimm5);
+}
+
+void Riscv64Assembler::Loadb(XRegister rd, XRegister rs1, int32_t offset) {
+  LoadFromOffset<&Riscv64Assembler::Lb>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadh(XRegister rd, XRegister rs1, int32_t offset) {
+  LoadFromOffset<&Riscv64Assembler::Lh>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadw(XRegister rd, XRegister rs1, int32_t offset) {
+  LoadFromOffset<&Riscv64Assembler::Lw>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadd(XRegister rd, XRegister rs1, int32_t offset) {
+  LoadFromOffset<&Riscv64Assembler::Ld>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadbu(XRegister rd, XRegister rs1, int32_t offset) {
+  LoadFromOffset<&Riscv64Assembler::Lbu>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadhu(XRegister rd, XRegister rs1, int32_t offset) {
+  LoadFromOffset<&Riscv64Assembler::Lhu>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Loadwu(XRegister rd, XRegister rs1, int32_t offset) {
+  LoadFromOffset<&Riscv64Assembler::Lwu>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::Storeb(XRegister rs2, XRegister rs1, int32_t offset) {
+  StoreToOffset<&Riscv64Assembler::Sb>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::Storeh(XRegister rs2, XRegister rs1, int32_t offset) {
+  StoreToOffset<&Riscv64Assembler::Sh>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::Storew(XRegister rs2, XRegister rs1, int32_t offset) {
+  StoreToOffset<&Riscv64Assembler::Sw>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::Stored(XRegister rs2, XRegister rs1, int32_t offset) {
+  StoreToOffset<&Riscv64Assembler::Sd>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::FLoadw(FRegister rd, XRegister rs1, int32_t offset) {
+  FLoadFromOffset<&Riscv64Assembler::FLw>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::FLoadd(FRegister rd, XRegister rs1, int32_t offset) {
+  FLoadFromOffset<&Riscv64Assembler::FLd>(rd, rs1, offset);
+}
+
+void Riscv64Assembler::FStorew(FRegister rs2, XRegister rs1, int32_t offset) {
+  FStoreToOffset<&Riscv64Assembler::FSw>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::FStored(FRegister rs2, XRegister rs1, int32_t offset) {
+  FStoreToOffset<&Riscv64Assembler::FSd>(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::LoadConst32(XRegister rd, int32_t value) {
+  // No need to use a temporary register for 32-bit values.
+  LoadImmediate(rd, value, /*can_use_tmp=*/ false);
+}
+
+void Riscv64Assembler::LoadConst64(XRegister rd, int64_t value) {
+  LoadImmediate(rd, value, /*can_use_tmp=*/ true);
+}
+
+template <typename ValueType, typename Addi, typename AddLarge>
+void AddConstImpl(Riscv64Assembler* assembler,
+                  XRegister rd,
+                  XRegister rs1,
+                  ValueType value,
+                  Addi&& addi,
+                  AddLarge&& add_large) {
+  ScratchRegisterScope srs(assembler);
+  // A temporary must be available for adjustment even if it's not needed.
+  // However, `rd` can be used as the temporary unless it's the same as `rs1` or SP.
+  DCHECK_IMPLIES(rd == rs1 || rd == SP, srs.AvailableXRegisters() != 0u);
+
+  if (IsInt<12>(value)) {
+    addi(rd, rs1, value);
+    return;
+  }
+
+  constexpr int32_t kPositiveValueSimpleAdjustment = 0x7ff;
+  constexpr int32_t kHighestValueForSimpleAdjustment = 2 * kPositiveValueSimpleAdjustment;
+  constexpr int32_t kNegativeValueSimpleAdjustment = -0x800;
+  constexpr int32_t kLowestValueForSimpleAdjustment = 2 * kNegativeValueSimpleAdjustment;
+
+  if (rd != rs1 && rd != SP) {
+    srs.IncludeXRegister(rd);
+  }
+  XRegister tmp = srs.AllocateXRegister();
+  if (value >= 0 && value <= kHighestValueForSimpleAdjustment) {
+    addi(tmp, rs1, kPositiveValueSimpleAdjustment);
+    addi(rd, tmp, value - kPositiveValueSimpleAdjustment);
+  } else if (value < 0 && value >= kLowestValueForSimpleAdjustment) {
+    addi(tmp, rs1, kNegativeValueSimpleAdjustment);
+    addi(rd, tmp, value - kNegativeValueSimpleAdjustment);
+  } else {
+    add_large(rd, rs1, value, tmp);
+  }
+}
+
+void Riscv64Assembler::AddConst32(XRegister rd, XRegister rs1, int32_t value) {
+  CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+  CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+  auto addiw = [&](XRegister rd, XRegister rs1, int32_t value) { Addiw(rd, rs1, value); };
+  auto add_large = [&](XRegister rd, XRegister rs1, int32_t value, XRegister tmp) {
+    LoadConst32(tmp, value);
+    Addw(rd, rs1, tmp);
+  };
+  AddConstImpl(this, rd, rs1, value, addiw, add_large);
+}
+
+void Riscv64Assembler::AddConst64(XRegister rd, XRegister rs1, int64_t value) {
+  CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+  CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+  auto addi = [&](XRegister rd, XRegister rs1, int32_t value) { Addi(rd, rs1, value); };
+  auto add_large = [&](XRegister rd, XRegister rs1, int64_t value, XRegister tmp) {
+    // We may not have another scratch register for `LoadConst64()`, so use `Li()`.
+    // TODO(riscv64): Refactor `LoadImmediate()` so that we can reuse the code to detect
+    // when the code path using the scratch reg is beneficial, and use that path with a
+    // small modification - instead of adding the two parts togeter, add them individually
+    // to the input `rs1`. (This works as long as `rd` is not the same as `tmp`.)
+    Li(tmp, value);
+    Add(rd, rs1, tmp);
+  };
+  AddConstImpl(this, rd, rs1, value, addi, add_large);
+}
+
+void Riscv64Assembler::Beqz(XRegister rs, Riscv64Label* label, bool is_bare) {
+  Beq(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bnez(XRegister rs, Riscv64Label* label, bool is_bare) {
+  Bne(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Blez(XRegister rs, Riscv64Label* label, bool is_bare) {
+  Ble(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bgez(XRegister rs, Riscv64Label* label, bool is_bare) {
+  Bge(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bltz(XRegister rs, Riscv64Label* label, bool is_bare) {
+  Blt(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Bgtz(XRegister rs, Riscv64Label* label, bool is_bare) {
+  Bgt(rs, Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondEQ, rs, rt);
+}
+
+void Riscv64Assembler::Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondNE, rs, rt);
+}
+
+void Riscv64Assembler::Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondLE, rs, rt);
+}
+
+void Riscv64Assembler::Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondGE, rs, rt);
+}
+
+void Riscv64Assembler::Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondLT, rs, rt);
+}
+
+void Riscv64Assembler::Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondGT, rs, rt);
+}
+
+void Riscv64Assembler::Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondLEU, rs, rt);
+}
+
+void Riscv64Assembler::Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondGEU, rs, rt);
+}
+
+void Riscv64Assembler::Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondLTU, rs, rt);
+}
+
+void Riscv64Assembler::Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare) {
+  Bcond(label, is_bare, kCondGTU, rs, rt);
+}
+
+void Riscv64Assembler::Jal(XRegister rd, Riscv64Label* label, bool is_bare) {
+  Buncond(label, rd, is_bare);
+}
+
+void Riscv64Assembler::J(Riscv64Label* label, bool is_bare) {
+  Jal(Zero, label, is_bare);
+}
+
+void Riscv64Assembler::Jal(Riscv64Label* label, bool is_bare) {
+  Jal(RA, label, is_bare);
+}
+
+void Riscv64Assembler::Loadw(XRegister rd, Literal* literal) {
+  DCHECK_EQ(literal->GetSize(), 4u);
+  LoadLiteral(literal, rd, Branch::kLiteral);
+}
+
+void Riscv64Assembler::Loadwu(XRegister rd, Literal* literal) {
+  DCHECK_EQ(literal->GetSize(), 4u);
+  LoadLiteral(literal, rd, Branch::kLiteralUnsigned);
+}
+
+void Riscv64Assembler::Loadd(XRegister rd, Literal* literal) {
+  DCHECK_EQ(literal->GetSize(), 8u);
+  LoadLiteral(literal, rd, Branch::kLiteralLong);
+}
+
+void Riscv64Assembler::FLoadw(FRegister rd, Literal* literal) {
+  DCHECK_EQ(literal->GetSize(), 4u);
+  LoadLiteral(literal, rd, Branch::kLiteralFloat);
+}
+
+void Riscv64Assembler::FLoadd(FRegister rd, Literal* literal) {
+  DCHECK_EQ(literal->GetSize(), 8u);
+  LoadLiteral(literal, rd, Branch::kLiteralDouble);
+}
+
+void Riscv64Assembler::Unimp() {
+  // TODO(riscv64): use 16-bit zero C.UNIMP once we support compression
+  Emit(0xC0001073);
+}
+
+/////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
+
+const Riscv64Assembler::Branch::BranchInfo Riscv64Assembler::Branch::branch_info_[] = {
+    // Short branches (can be promoted to longer).
+    {4, 0, Riscv64Assembler::Branch::kOffset13},  // kCondBranch
+    {4, 0, Riscv64Assembler::Branch::kOffset21},  // kUncondBranch
+    {4, 0, Riscv64Assembler::Branch::kOffset21},  // kCall
+    // Short branches (can't be promoted to longer).
+    {4, 0, Riscv64Assembler::Branch::kOffset13},  // kBareCondBranch
+    {4, 0, Riscv64Assembler::Branch::kOffset21},  // kBareUncondBranch
+    {4, 0, Riscv64Assembler::Branch::kOffset21},  // kBareCall
+
+    // Medium branch.
+    {8, 4, Riscv64Assembler::Branch::kOffset21},  // kCondBranch21
+
+    // Long branches.
+    {12, 4, Riscv64Assembler::Branch::kOffset32},  // kLongCondBranch
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLongUncondBranch
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLongCall
+
+    // label.
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLabel
+
+    // literals.
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLiteral
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLiteralUnsigned
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLiteralLong
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLiteralFloat
+    {8, 0, Riscv64Assembler::Branch::kOffset32},  // kLiteralDouble
+};
+
+void Riscv64Assembler::Branch::InitShortOrLong(Riscv64Assembler::Branch::OffsetBits offset_size,
+                                               Riscv64Assembler::Branch::Type short_type,
+                                               Riscv64Assembler::Branch::Type long_type,
+                                               Riscv64Assembler::Branch::Type longest_type) {
+  Riscv64Assembler::Branch::Type type = short_type;
+  if (offset_size > branch_info_[type].offset_size) {
+    type = long_type;
+    if (offset_size > branch_info_[type].offset_size) {
+      type = longest_type;
+    }
+  }
+  type_ = type;
+}
+
+void Riscv64Assembler::Branch::InitializeType(Type initial_type) {
+  OffsetBits offset_size_needed = GetOffsetSizeNeeded(location_, target_);
+
+  switch (initial_type) {
+    case kCondBranch:
+      if (condition_ != kUncond) {
+        InitShortOrLong(offset_size_needed, kCondBranch, kCondBranch21, kLongCondBranch);
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    case kUncondBranch:
+      InitShortOrLong(offset_size_needed, kUncondBranch, kLongUncondBranch, kLongUncondBranch);
+      break;
+    case kCall:
+      InitShortOrLong(offset_size_needed, kCall, kLongCall, kLongCall);
+      break;
+    case kBareCondBranch:
+      if (condition_ != kUncond) {
+        type_ = kBareCondBranch;
+        CHECK_LE(offset_size_needed, GetOffsetSize());
+        break;
+      }
+      FALLTHROUGH_INTENDED;
+    case kBareUncondBranch:
+      type_ = kBareUncondBranch;
+      CHECK_LE(offset_size_needed, GetOffsetSize());
+      break;
+    case kBareCall:
+      type_ = kBareCall;
+      CHECK_LE(offset_size_needed, GetOffsetSize());
+      break;
+    case kLabel:
+      type_ = initial_type;
+      break;
+    case kLiteral:
+    case kLiteralUnsigned:
+    case kLiteralLong:
+    case kLiteralFloat:
+    case kLiteralDouble:
+      CHECK(!IsResolved());
+      type_ = initial_type;
+      break;
+    default:
+      LOG(FATAL) << "Unexpected branch type " << enum_cast<uint32_t>(initial_type);
+      UNREACHABLE();
+  }
+
+  old_type_ = type_;
+}
+
+bool Riscv64Assembler::Branch::IsNop(BranchCondition condition, XRegister lhs, XRegister rhs) {
+  switch (condition) {
+    case kCondNE:
+    case kCondLT:
+    case kCondGT:
+    case kCondLTU:
+    case kCondGTU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+bool Riscv64Assembler::Branch::IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs) {
+  switch (condition) {
+    case kUncond:
+      return true;
+    case kCondEQ:
+    case kCondGE:
+    case kCondLE:
+    case kCondLEU:
+    case kCondGEU:
+      return lhs == rhs;
+    default:
+      return false;
+  }
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(rd),
+      rhs_reg_(Zero),
+      freg_(kNoFRegister),
+      condition_(kUncond) {
+  InitializeType(
+      (rd != Zero ? (is_bare ? kBareCall : kCall) : (is_bare ? kBareUncondBranch : kUncondBranch)));
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location,
+                                 uint32_t target,
+                                 Riscv64Assembler::BranchCondition condition,
+                                 XRegister lhs_reg,
+                                 XRegister rhs_reg,
+                                 bool is_bare)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(lhs_reg),
+      rhs_reg_(rhs_reg),
+      freg_(kNoFRegister),
+      condition_(condition) {
+  DCHECK_NE(condition, kUncond);
+  DCHECK(!IsNop(condition, lhs_reg, rhs_reg));
+  DCHECK(!IsUncond(condition, lhs_reg, rhs_reg));
+  InitializeType(is_bare ? kBareCondBranch : kCondBranch);
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location,
+                                 uint32_t target,
+                                 XRegister rd,
+                                 Type label_or_literal_type)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(rd),
+      rhs_reg_(Zero),
+      freg_(kNoFRegister),
+      condition_(kUncond) {
+  CHECK_NE(rd , Zero);
+  InitializeType(label_or_literal_type);
+}
+
+Riscv64Assembler::Branch::Branch(uint32_t location,
+                                 uint32_t target,
+                                 FRegister rd,
+                                 Type literal_type)
+    : old_location_(location),
+      location_(location),
+      target_(target),
+      lhs_reg_(Zero),
+      rhs_reg_(Zero),
+      freg_(rd),
+      condition_(kUncond) {
+  InitializeType(literal_type);
+}
+
+Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::OppositeCondition(
+    Riscv64Assembler::BranchCondition cond) {
+  switch (cond) {
+    case kCondEQ:
+      return kCondNE;
+    case kCondNE:
+      return kCondEQ;
+    case kCondLT:
+      return kCondGE;
+    case kCondGE:
+      return kCondLT;
+    case kCondLE:
+      return kCondGT;
+    case kCondGT:
+      return kCondLE;
+    case kCondLTU:
+      return kCondGEU;
+    case kCondGEU:
+      return kCondLTU;
+    case kCondLEU:
+      return kCondGTU;
+    case kCondGTU:
+      return kCondLEU;
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond);
+      UNREACHABLE();
+  }
+}
+
+Riscv64Assembler::Branch::Type Riscv64Assembler::Branch::GetType() const { return type_; }
+
+Riscv64Assembler::BranchCondition Riscv64Assembler::Branch::GetCondition() const {
+    return condition_;
+}
+
+XRegister Riscv64Assembler::Branch::GetLeftRegister() const { return lhs_reg_; }
+
+XRegister Riscv64Assembler::Branch::GetRightRegister() const { return rhs_reg_; }
+
+FRegister Riscv64Assembler::Branch::GetFRegister() const { return freg_; }
+
+uint32_t Riscv64Assembler::Branch::GetTarget() const { return target_; }
+
+uint32_t Riscv64Assembler::Branch::GetLocation() const { return location_; }
+
+uint32_t Riscv64Assembler::Branch::GetOldLocation() const { return old_location_; }
+
+uint32_t Riscv64Assembler::Branch::GetLength() const { return branch_info_[type_].length; }
+
+uint32_t Riscv64Assembler::Branch::GetOldLength() const { return branch_info_[old_type_].length; }
+
+uint32_t Riscv64Assembler::Branch::GetEndLocation() const { return GetLocation() + GetLength(); }
+
+uint32_t Riscv64Assembler::Branch::GetOldEndLocation() const {
+  return GetOldLocation() + GetOldLength();
+}
+
+bool Riscv64Assembler::Branch::IsBare() const {
+  switch (type_) {
+    case kBareUncondBranch:
+    case kBareCondBranch:
+    case kBareCall:
+      return true;
+    default:
+      return false;
+  }
+}
+
+bool Riscv64Assembler::Branch::IsResolved() const { return target_ != kUnresolved; }
+
+Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSize() const {
+  return branch_info_[type_].offset_size;
+}
+
+Riscv64Assembler::Branch::OffsetBits Riscv64Assembler::Branch::GetOffsetSizeNeeded(
+    uint32_t location, uint32_t target) {
+  // For unresolved targets assume the shortest encoding
+  // (later it will be made longer if needed).
+  if (target == kUnresolved) {
+    return kOffset13;
+  }
+  int64_t distance = static_cast<int64_t>(target) - location;
+  if (IsInt<kOffset13>(distance)) {
+    return kOffset13;
+  } else if (IsInt<kOffset21>(distance)) {
+    return kOffset21;
+  } else {
+    return kOffset32;
+  }
+}
+
+void Riscv64Assembler::Branch::Resolve(uint32_t target) { target_ = target; }
+
+void Riscv64Assembler::Branch::Relocate(uint32_t expand_location, uint32_t delta) {
+  // All targets should be resolved before we start promoting branches.
+  DCHECK(IsResolved());
+  if (location_ > expand_location) {
+    location_ += delta;
+  }
+  if (target_ > expand_location) {
+    target_ += delta;
+  }
+}
+
+uint32_t Riscv64Assembler::Branch::PromoteIfNeeded() {
+  // All targets should be resolved before we start promoting branches.
+  DCHECK(IsResolved());
+  Type old_type = type_;
+  switch (type_) {
+    // Short branches (can be promoted to longer).
+    case kCondBranch: {
+      OffsetBits needed_size = GetOffsetSizeNeeded(GetOffsetLocation(), target_);
+      if (needed_size <= GetOffsetSize()) {
+        return 0u;
+      }
+      // The offset remains the same for `kCondBranch21` for forward branches.
+      DCHECK_EQ(branch_info_[kCondBranch21].length - branch_info_[kCondBranch21].pc_offset,
+                branch_info_[kCondBranch].length - branch_info_[kCondBranch].pc_offset);
+      if (target_ <= location_) {
+        // Calculate the needed size for kCondBranch21.
+        needed_size =
+            GetOffsetSizeNeeded(location_ + branch_info_[kCondBranch21].pc_offset, target_);
+      }
+      type_ = (needed_size <= branch_info_[kCondBranch21].offset_size)
+          ? kCondBranch21
+          : kLongCondBranch;
+      break;
+    }
+    case kUncondBranch:
+      if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) {
+        return 0u;
+      }
+      type_ = kLongUncondBranch;
+      break;
+    case kCall:
+      if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) {
+        return 0u;
+      }
+      type_ = kLongCall;
+      break;
+    // Medium branch (can be promoted to long).
+    case kCondBranch21:
+      if (GetOffsetSizeNeeded(GetOffsetLocation(), target_) <= GetOffsetSize()) {
+        return 0u;
+      }
+      type_ = kLongCondBranch;
+      break;
+    default:
+      // Other branch types cannot be promoted.
+      DCHECK_LE(GetOffsetSizeNeeded(GetOffsetLocation(), target_), GetOffsetSize()) << type_;
+      return 0u;
+  }
+  DCHECK(type_ != old_type);
+  DCHECK_GT(branch_info_[type_].length, branch_info_[old_type].length);
+  return branch_info_[type_].length - branch_info_[old_type].length;
+}
+
+uint32_t Riscv64Assembler::Branch::GetOffsetLocation() const {
+  return location_ + branch_info_[type_].pc_offset;
+}
+
+int32_t Riscv64Assembler::Branch::GetOffset() const {
+  CHECK(IsResolved());
+  // Calculate the byte distance between instructions and also account for
+  // different PC-relative origins.
+  uint32_t offset_location = GetOffsetLocation();
+  int32_t offset = static_cast<int32_t>(target_ - offset_location);
+  DCHECK_EQ(offset, static_cast<int64_t>(target_) - static_cast<int64_t>(offset_location));
+  return offset;
+}
+
+void Riscv64Assembler::EmitBcond(BranchCondition cond,
+                                 XRegister rs,
+                                 XRegister rt,
+                                 int32_t offset) {
+  switch (cond) {
+#define DEFINE_CASE(COND, cond) \
+    case kCond##COND:           \
+      B##cond(rs, rt, offset);  \
+      break;
+    DEFINE_CASE(EQ, eq)
+    DEFINE_CASE(NE, ne)
+    DEFINE_CASE(LT, lt)
+    DEFINE_CASE(GE, ge)
+    DEFINE_CASE(LE, le)
+    DEFINE_CASE(GT, gt)
+    DEFINE_CASE(LTU, ltu)
+    DEFINE_CASE(GEU, geu)
+    DEFINE_CASE(LEU, leu)
+    DEFINE_CASE(GTU, gtu)
+#undef DEFINE_CASE
+    case kUncond:
+      LOG(FATAL) << "Unexpected branch condition " << enum_cast<uint32_t>(cond);
+      UNREACHABLE();
+  }
+}
+
+void Riscv64Assembler::EmitBranch(Riscv64Assembler::Branch* branch) {
+  CHECK(overwriting_);
+  overwrite_location_ = branch->GetLocation();
+  const int32_t offset = branch->GetOffset();
+  BranchCondition condition = branch->GetCondition();
+  XRegister lhs = branch->GetLeftRegister();
+  XRegister rhs = branch->GetRightRegister();
+
+  auto emit_auipc_and_next = [&](XRegister reg, auto next) {
+    CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+    auto [imm20, short_offset] = SplitOffset(offset);
+    Auipc(reg, imm20);
+    next(short_offset);
+  };
+
+  switch (branch->GetType()) {
+    // Short branches.
+    case Branch::kUncondBranch:
+    case Branch::kBareUncondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      J(offset);
+      break;
+    case Branch::kCondBranch:
+    case Branch::kBareCondBranch:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      EmitBcond(condition, lhs, rhs, offset);
+      break;
+    case Branch::kCall:
+    case Branch::kBareCall:
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      DCHECK(lhs != Zero);
+      Jal(lhs, offset);
+      break;
+
+    // Medium branch.
+    case Branch::kCondBranch21:
+      EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength());
+      CHECK_EQ(overwrite_location_, branch->GetOffsetLocation());
+      J(offset);
+      break;
+
+    // Long branches.
+    case Branch::kLongCondBranch:
+      EmitBcond(Branch::OppositeCondition(condition), lhs, rhs, branch->GetLength());
+      FALLTHROUGH_INTENDED;
+    case Branch::kLongUncondBranch:
+      emit_auipc_and_next(TMP, [&](int32_t short_offset) { Jalr(Zero, TMP, short_offset); });
+      break;
+    case Branch::kLongCall:
+      DCHECK(lhs != Zero);
+      emit_auipc_and_next(lhs, [&](int32_t short_offset) { Jalr(lhs, lhs, short_offset); });
+      break;
+
+    // label.
+    case Branch::kLabel:
+      emit_auipc_and_next(lhs, [&](int32_t short_offset) { Addi(lhs, lhs, short_offset); });
+      break;
+    // literals.
+    case Branch::kLiteral:
+      emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lw(lhs, lhs, short_offset); });
+      break;
+    case Branch::kLiteralUnsigned:
+      emit_auipc_and_next(lhs, [&](int32_t short_offset) { Lwu(lhs, lhs, short_offset); });
+      break;
+    case Branch::kLiteralLong:
+      emit_auipc_and_next(lhs, [&](int32_t short_offset) { Ld(lhs, lhs, short_offset); });
+      break;
+    case Branch::kLiteralFloat:
+      emit_auipc_and_next(
+          TMP, [&](int32_t short_offset) { FLw(branch->GetFRegister(), TMP, short_offset); });
+      break;
+    case Branch::kLiteralDouble:
+      emit_auipc_and_next(
+          TMP, [&](int32_t short_offset) { FLd(branch->GetFRegister(), TMP, short_offset); });
+      break;
+  }
+  CHECK_EQ(overwrite_location_, branch->GetEndLocation());
+  CHECK_LE(branch->GetLength(), static_cast<uint32_t>(Branch::kMaxBranchLength));
+}
+
+void Riscv64Assembler::EmitBranches() {
+  CHECK(!overwriting_);
+  // Switch from appending instructions at the end of the buffer to overwriting
+  // existing instructions (branch placeholders) in the buffer.
+  overwriting_ = true;
+  for (auto& branch : branches_) {
+    EmitBranch(&branch);
+  }
+  overwriting_ = false;
+}
+
+void Riscv64Assembler::FinalizeLabeledBranch(Riscv64Label* label) {
+  // TODO(riscv64): Support "C" Standard Extension - length may not be a multiple of 4.
+  DCHECK_ALIGNED(branches_.back().GetLength(), sizeof(uint32_t));
+  uint32_t length = branches_.back().GetLength() / sizeof(uint32_t);
+  if (!label->IsBound()) {
+    // Branch forward (to a following label), distance is unknown.
+    // The first branch forward will contain 0, serving as the terminator of
+    // the list of forward-reaching branches.
+    Emit(label->position_);
+    length--;
+    // Now make the label object point to this branch
+    // (this forms a linked list of branches preceding this label).
+    uint32_t branch_id = branches_.size() - 1;
+    label->LinkTo(branch_id);
+  }
+  // Reserve space for the branch.
+  for (; length != 0u; --length) {
+    Nop();
+  }
+}
+
+void Riscv64Assembler::Bcond(
+    Riscv64Label* label, bool is_bare, BranchCondition condition, XRegister lhs, XRegister rhs) {
+  // TODO(riscv64): Should an assembler perform these optimizations, or should we remove them?
+  // If lhs = rhs, this can be a NOP.
+  if (Branch::IsNop(condition, lhs, rhs)) {
+    return;
+  }
+  if (Branch::IsUncond(condition, lhs, rhs)) {
+    Buncond(label, Zero, is_bare);
+    return;
+  }
+
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, condition, lhs, rhs, is_bare);
+  FinalizeLabeledBranch(label);
+}
+
+void Riscv64Assembler::Buncond(Riscv64Label* label, XRegister rd, bool is_bare) {
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, rd, is_bare);
+  FinalizeLabeledBranch(label);
+}
+
+template <typename XRegisterOrFRegister>
+void Riscv64Assembler::LoadLiteral(Literal* literal,
+                                   XRegisterOrFRegister rd,
+                                   Branch::Type literal_type) {
+  Riscv64Label* label = literal->GetLabel();
+  DCHECK(!label->IsBound());
+  branches_.emplace_back(buffer_.Size(), Branch::kUnresolved, rd, literal_type);
+  FinalizeLabeledBranch(label);
+}
+
+Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+const Riscv64Assembler::Branch* Riscv64Assembler::GetBranch(uint32_t branch_id) const {
+  CHECK_LT(branch_id, branches_.size());
+  return &branches_[branch_id];
+}
+
+void Riscv64Assembler::Bind(Riscv64Label* label) {
+  CHECK(!label->IsBound());
+  uint32_t bound_pc = buffer_.Size();
+
+  // Walk the list of branches referring to and preceding this label.
+  // Store the previously unknown target addresses in them.
+  while (label->IsLinked()) {
+    uint32_t branch_id = label->Position();
+    Branch* branch = GetBranch(branch_id);
+    branch->Resolve(bound_pc);
+
+    uint32_t branch_location = branch->GetLocation();
+    // Extract the location of the previous branch in the list (walking the list backwards;
+    // the previous branch ID was stored in the space reserved for this branch).
+    uint32_t prev = buffer_.Load<uint32_t>(branch_location);
+
+    // On to the previous branch in the list...
+    label->position_ = prev;
+  }
+
+  // Now make the label object contain its own location (relative to the end of the preceding
+  // branch, if any; it will be used by the branches referring to and following this label).
+  uint32_t prev_branch_id = Riscv64Label::kNoPrevBranchId;
+  if (!branches_.empty()) {
+    prev_branch_id = branches_.size() - 1u;
+    const Branch* prev_branch = GetBranch(prev_branch_id);
+    bound_pc -= prev_branch->GetEndLocation();
+  }
+  label->prev_branch_id_ = prev_branch_id;
+  label->BindTo(bound_pc);
+}
+
+void Riscv64Assembler::LoadLabelAddress(XRegister rd, Riscv64Label* label) {
+  DCHECK_NE(rd, Zero);
+  uint32_t target = label->IsBound() ? GetLabelLocation(label) : Branch::kUnresolved;
+  branches_.emplace_back(buffer_.Size(), target, rd, Branch::kLabel);
+  FinalizeLabeledBranch(label);
+}
+
+Literal* Riscv64Assembler::NewLiteral(size_t size, const uint8_t* data) {
+  // We don't support byte and half-word literals.
+  if (size == 4u) {
+    literals_.emplace_back(size, data);
+    return &literals_.back();
+  } else {
+    DCHECK_EQ(size, 8u);
+    long_literals_.emplace_back(size, data);
+    return &long_literals_.back();
+  }
+}
+
+JumpTable* Riscv64Assembler::CreateJumpTable(ArenaVector<Riscv64Label*>&& labels) {
+  jump_tables_.emplace_back(std::move(labels));
+  JumpTable* table = &jump_tables_.back();
+  DCHECK(!table->GetLabel()->IsBound());
+  return table;
+}
+
+uint32_t Riscv64Assembler::GetLabelLocation(const Riscv64Label* label) const {
+  CHECK(label->IsBound());
+  uint32_t target = label->Position();
+  if (label->prev_branch_id_ != Riscv64Label::kNoPrevBranchId) {
+    // Get label location based on the branch preceding it.
+    const Branch* prev_branch = GetBranch(label->prev_branch_id_);
+    target += prev_branch->GetEndLocation();
+  }
+  return target;
+}
+
+uint32_t Riscv64Assembler::GetAdjustedPosition(uint32_t old_position) {
+  // We can reconstruct the adjustment by going through all the branches from the beginning
+  // up to the `old_position`. Since we expect `GetAdjustedPosition()` to be called in a loop
+  // with increasing `old_position`, we can use the data from last `GetAdjustedPosition()` to
+  // continue where we left off and the whole loop should be O(m+n) where m is the number
+  // of positions to adjust and n is the number of branches.
+  if (old_position < last_old_position_) {
+    last_position_adjustment_ = 0;
+    last_old_position_ = 0;
+    last_branch_id_ = 0;
+  }
+  while (last_branch_id_ != branches_.size()) {
+    const Branch* branch = GetBranch(last_branch_id_);
+    if (branch->GetLocation() >= old_position + last_position_adjustment_) {
+      break;
+    }
+    last_position_adjustment_ += branch->GetLength() - branch->GetOldLength();
+    ++last_branch_id_;
+  }
+  last_old_position_ = old_position;
+  return old_position + last_position_adjustment_;
+}
+
+void Riscv64Assembler::ReserveJumpTableSpace() {
+  if (!jump_tables_.empty()) {
+    for (JumpTable& table : jump_tables_) {
+      Riscv64Label* label = table.GetLabel();
+      Bind(label);
+
+      // Bulk ensure capacity, as this may be large.
+      size_t orig_size = buffer_.Size();
+      size_t required_capacity = orig_size + table.GetSize();
+      if (required_capacity > buffer_.Capacity()) {
+        buffer_.ExtendCapacity(required_capacity);
+      }
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = true;
+#endif
+
+      // Fill the space with placeholder data as the data is not final
+      // until the branches have been promoted. And we shouldn't
+      // be moving uninitialized data during branch promotion.
+      for (size_t cnt = table.GetData().size(), i = 0; i < cnt; ++i) {
+        buffer_.Emit<uint32_t>(0x1abe1234u);
+      }
+
+#ifndef NDEBUG
+      buffer_.has_ensured_capacity_ = false;
+#endif
+    }
+  }
+}
+
+void Riscv64Assembler::PromoteBranches() {
+  // Promote short branches to long as necessary.
+  bool changed;
+  do {
+    changed = false;
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+      uint32_t delta = branch.PromoteIfNeeded();
+      // If this branch has been promoted and needs to expand in size,
+      // relocate all branches by the expansion size.
+      if (delta != 0u) {
+        changed = true;
+        uint32_t expand_location = branch.GetLocation();
+        for (auto& branch2 : branches_) {
+          branch2.Relocate(expand_location, delta);
+        }
+      }
+    }
+  } while (changed);
+
+  // Account for branch expansion by resizing the code buffer
+  // and moving the code in it to its final location.
+  size_t branch_count = branches_.size();
+  if (branch_count > 0) {
+    // Resize.
+    Branch& last_branch = branches_[branch_count - 1];
+    uint32_t size_delta = last_branch.GetEndLocation() - last_branch.GetOldEndLocation();
+    uint32_t old_size = buffer_.Size();
+    buffer_.Resize(old_size + size_delta);
+    // Move the code residing between branch placeholders.
+    uint32_t end = old_size;
+    for (size_t i = branch_count; i > 0;) {
+      Branch& branch = branches_[--i];
+      uint32_t size = end - branch.GetOldEndLocation();
+      buffer_.Move(branch.GetEndLocation(), branch.GetOldEndLocation(), size);
+      end = branch.GetOldLocation();
+    }
+  }
+
+  // Align 64-bit literals by moving them up by 4 bytes if needed.
+  // This can increase the PC-relative distance but all literals are accessed with AUIPC+Load(imm12)
+  // without branch promotion, so this late adjustment cannot take them out of instruction range.
+  if (!long_literals_.empty()) {
+    uint32_t first_literal_location = GetLabelLocation(long_literals_.front().GetLabel());
+    size_t lit_size = long_literals_.size() * sizeof(uint64_t);
+    size_t buf_size = buffer_.Size();
+    // 64-bit literals must be at the very end of the buffer.
+    CHECK_EQ(first_literal_location + lit_size, buf_size);
+    if (!IsAligned<sizeof(uint64_t)>(first_literal_location)) {
+      // Insert the padding.
+      buffer_.Resize(buf_size + sizeof(uint32_t));
+      buffer_.Move(first_literal_location + sizeof(uint32_t), first_literal_location, lit_size);
+      DCHECK(!overwriting_);
+      overwriting_ = true;
+      overwrite_location_ = first_literal_location;
+      Emit(0);  // Illegal instruction.
+      overwriting_ = false;
+      // Increase target addresses in literal and address loads by 4 bytes in order for correct
+      // offsets from PC to be generated.
+      for (auto& branch : branches_) {
+        uint32_t target = branch.GetTarget();
+        if (target >= first_literal_location) {
+          branch.Resolve(target + sizeof(uint32_t));
+        }
+      }
+      // If after this we ever call GetLabelLocation() to get the location of a 64-bit literal,
+      // we need to adjust the location of the literal's label as well.
+      for (Literal& literal : long_literals_) {
+        // Bound label's position is negative, hence decrementing it instead of incrementing.
+        literal.GetLabel()->position_ -= sizeof(uint32_t);
+      }
+    }
+  }
+}
+
+void Riscv64Assembler::PatchCFI() {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    return;
+  }
+
+  using DelayedAdvancePC = DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // Refill our data buffer with patched opcodes.
+  static constexpr size_t kExtraSpace = 16;  // Not every PC advance can be encoded in one byte.
+  cfi().ReserveCFIStream(old_stream.size() + advances.size() + kExtraSpace);
+  size_t stream_pos = 0;
+  for (const DelayedAdvancePC& advance : advances) {
+    DCHECK_GE(advance.stream_pos, stream_pos);
+    // Copy old data up to the point where advance was issued.
+    cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+    stream_pos = advance.stream_pos;
+    // Insert the advance command with its final offset.
+    size_t final_pc = GetAdjustedPosition(advance.pc);
+    cfi().AdvancePC(final_pc);
+  }
+  // Copy the final segment if any.
+  cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
+void Riscv64Assembler::EmitJumpTables() {
+  if (!jump_tables_.empty()) {
+    CHECK(!overwriting_);
+    // Switch from appending instructions at the end of the buffer to overwriting
+    // existing instructions (here, jump tables) in the buffer.
+    overwriting_ = true;
+
+    for (JumpTable& table : jump_tables_) {
+      Riscv64Label* table_label = table.GetLabel();
+      uint32_t start = GetLabelLocation(table_label);
+      overwrite_location_ = start;
+
+      for (Riscv64Label* target : table.GetData()) {
+        CHECK_EQ(buffer_.Load<uint32_t>(overwrite_location_), 0x1abe1234u);
+        // The table will contain target addresses relative to the table start.
+        uint32_t offset = GetLabelLocation(target) - start;
+        Emit(offset);
+      }
+    }
+
+    overwriting_ = false;
+  }
+}
+
+void Riscv64Assembler::EmitLiterals() {
+  if (!literals_.empty()) {
+    for (Literal& literal : literals_) {
+      Riscv64Label* label = literal.GetLabel();
+      Bind(label);
+      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+      DCHECK_EQ(literal.GetSize(), 4u);
+      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+        buffer_.Emit<uint8_t>(literal.GetData()[i]);
+      }
+    }
+  }
+  if (!long_literals_.empty()) {
+    // These need to be 8-byte-aligned but we shall add the alignment padding after the branch
+    // promotion, if needed. Since all literals are accessed with AUIPC+Load(imm12) without branch
+    // promotion, this late adjustment cannot take long literals out of instruction range.
+    for (Literal& literal : long_literals_) {
+      Riscv64Label* label = literal.GetLabel();
+      Bind(label);
+      AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+      DCHECK_EQ(literal.GetSize(), 8u);
+      for (size_t i = 0, size = literal.GetSize(); i != size; ++i) {
+        buffer_.Emit<uint8_t>(literal.GetData()[i]);
+      }
+    }
+  }
+}
+
+// This method is used to adjust the base register and offset pair for
+// a load/store when the offset doesn't fit into 12-bit signed integer.
+void Riscv64Assembler::AdjustBaseAndOffset(XRegister& base,
+                                           int32_t& offset,
+                                           ScratchRegisterScope& srs) {
+  // A scratch register must be available for adjustment even if it's not needed.
+  CHECK_NE(srs.AvailableXRegisters(), 0u);
+  if (IsInt<12>(offset)) {
+    return;
+  }
+
+  constexpr int32_t kPositiveOffsetMaxSimpleAdjustment = 0x7ff;
+  constexpr int32_t kHighestOffsetForSimpleAdjustment = 2 * kPositiveOffsetMaxSimpleAdjustment;
+  constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned8 =
+      RoundDown(kPositiveOffsetMaxSimpleAdjustment, 8);
+  constexpr int32_t kPositiveOffsetSimpleAdjustmentAligned4 =
+      RoundDown(kPositiveOffsetMaxSimpleAdjustment, 4);
+  constexpr int32_t kNegativeOffsetSimpleAdjustment = -0x800;
+  constexpr int32_t kLowestOffsetForSimpleAdjustment = 2 * kNegativeOffsetSimpleAdjustment;
+
+  XRegister tmp = srs.AllocateXRegister();
+  if (offset >= 0 && offset <= kHighestOffsetForSimpleAdjustment) {
+    // Make the adjustment 8-byte aligned (0x7f8) except for offsets that cannot be reached
+    // with this adjustment, then try 4-byte alignment, then just half of the offset.
+    int32_t adjustment = IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned8)
+        ? kPositiveOffsetSimpleAdjustmentAligned8
+        : IsInt<12>(offset - kPositiveOffsetSimpleAdjustmentAligned4)
+            ? kPositiveOffsetSimpleAdjustmentAligned4
+            : offset / 2;
+    DCHECK(IsInt<12>(adjustment));
+    Addi(tmp, base, adjustment);
+    offset -= adjustment;
+  } else if (offset < 0 && offset >= kLowestOffsetForSimpleAdjustment) {
+    Addi(tmp, base, kNegativeOffsetSimpleAdjustment);
+    offset -= kNegativeOffsetSimpleAdjustment;
+  } else if (offset >= 0x7ffff800) {
+    // Support even large offsets outside the range supported by `SplitOffset()`.
+    LoadConst32(tmp, offset);
+    Add(tmp, tmp, base);
+    offset = 0;
+  } else {
+    auto [imm20, short_offset] = SplitOffset(offset);
+    Lui(tmp, imm20);
+    Add(tmp, tmp, base);
+    offset = short_offset;
+  }
+  base = tmp;
+}
+
+template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+void Riscv64Assembler::LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset) {
+  CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+  CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+  ScratchRegisterScope srs(this);
+  // If `rd` differs from `rs1`, allow using it as a temporary if needed.
+  if (rd != rs1) {
+    srs.IncludeXRegister(rd);
+  }
+  AdjustBaseAndOffset(rs1, offset, srs);
+  (this->*insn)(rd, rs1, offset);
+}
+
+template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+void Riscv64Assembler::StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset) {
+  CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+  CHECK_EQ((1u << rs2) & available_scratch_core_registers_, 0u);
+  ScratchRegisterScope srs(this);
+  AdjustBaseAndOffset(rs1, offset, srs);
+  (this->*insn)(rs2, rs1, offset);
+}
+
+template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+void Riscv64Assembler::FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset) {
+  CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+  ScratchRegisterScope srs(this);
+  AdjustBaseAndOffset(rs1, offset, srs);
+  (this->*insn)(rd, rs1, offset);
+}
+
+template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+void Riscv64Assembler::FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset) {
+  CHECK_EQ((1u << rs1) & available_scratch_core_registers_, 0u);
+  ScratchRegisterScope srs(this);
+  AdjustBaseAndOffset(rs1, offset, srs);
+  (this->*insn)(rs2, rs1, offset);
+}
+
+void Riscv64Assembler::LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp) {
+  CHECK_EQ((1u << rd) & available_scratch_core_registers_, 0u);
+  ScratchRegisterScope srs(this);
+  CHECK_IMPLIES(can_use_tmp, srs.AvailableXRegisters() != 0u);
+
+  // Helper lambdas.
+  auto addi = [&](XRegister rd, XRegister rs, int32_t imm) { Addi(rd, rs, imm); };
+  auto addiw = [&](XRegister rd, XRegister rs, int32_t imm) { Addiw(rd, rs, imm); };
+  auto slli = [&](XRegister rd, XRegister rs, int32_t imm) { Slli(rd, rs, imm); };
+  auto lui = [&](XRegister rd, uint32_t imm20) { Lui(rd, imm20); };
+
+  // Simple LUI+ADDI/W can handle value range [-0x80000800, 0x7fffffff].
+  auto is_simple_li_value = [](int64_t value) {
+    return value >= INT64_C(-0x80000800) && value <= INT64_C(0x7fffffff);
+  };
+  auto emit_simple_li_helper = [&](XRegister rd,
+                                   int64_t value,
+                                   auto&& addi,
+                                   auto&& addiw,
+                                   auto&& slli,
+                                   auto&& lui) {
+    DCHECK(is_simple_li_value(value)) << "0x" << std::hex << value;
+    if (IsInt<12>(value)) {
+      addi(rd, Zero, value);
+    } else if (CTZ(value) < 12 && IsInt(6 + CTZ(value), value)) {
+      // This path yields two 16-bit instructions with the "C" Standard Extension.
+      addi(rd, Zero, value >> CTZ(value));
+      slli(rd, rd, CTZ(value));
+    } else if (value < INT64_C(-0x80000000)) {
+      int32_t small_value = dchecked_integral_cast<int32_t>(value - INT64_C(-0x80000000));
+      DCHECK(IsInt<12>(small_value));
+      DCHECK_LT(small_value, 0);
+      lui(rd, 1u << 19);
+      addi(rd, rd, small_value);
+    } else {
+      DCHECK(IsInt<32>(value));
+      // Note: Similar to `SplitOffset()` but we can target the full 32-bit range with ADDIW.
+      int64_t near_value = (value + 0x800) & ~0xfff;
+      int32_t small_value = value - near_value;
+      DCHECK(IsInt<12>(small_value));
+      uint32_t imm20 = static_cast<uint32_t>(near_value) >> 12;
+      DCHECK_NE(imm20, 0u);  // Small values are handled above.
+      lui(rd, imm20);
+      if (small_value != 0) {
+        addiw(rd, rd, small_value);
+      }
+    }
+  };
+  auto emit_simple_li = [&](XRegister rd, int64_t value) {
+    emit_simple_li_helper(rd, value, addi, addiw, slli, lui);
+  };
+  auto count_simple_li_instructions = [&](int64_t value) {
+    size_t num_instructions = 0u;
+    auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; };
+    auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; };
+    emit_simple_li_helper(Zero, value, count_rri, count_rri, count_rri, count_ru);
+    return num_instructions;
+  };
+
+  // If LUI+ADDI/W is not enough, we can generate up to 3 SLLI+ADDI afterwards (up to 8 instructions
+  // total). The ADDI from the first SLLI+ADDI pair can be a no-op.
+  auto emit_with_slli_addi_helper = [&](XRegister rd,
+                                        int64_t value,
+                                        auto&& addi,
+                                        auto&& addiw,
+                                        auto&& slli,
+                                        auto&& lui) {
+    static constexpr size_t kMaxNumSllAddi = 3u;
+    int32_t addi_values[kMaxNumSllAddi];
+    size_t sll_shamts[kMaxNumSllAddi];
+    size_t num_sll_addi = 0u;
+    while (!is_simple_li_value(value)) {
+      DCHECK_LT(num_sll_addi, kMaxNumSllAddi);
+      // Prepare sign-extended low 12 bits for ADDI.
+      int64_t addi_value = (value & 0xfff) - ((value & 0x800) << 1);
+      DCHECK(IsInt<12>(addi_value));
+      int64_t remaining = value - addi_value;
+      size_t shamt = CTZ(remaining);
+      DCHECK_GE(shamt, 12u);
+      addi_values[num_sll_addi] = addi_value;
+      sll_shamts[num_sll_addi] = shamt;
+      value = remaining >> shamt;
+      ++num_sll_addi;
+    }
+    if (num_sll_addi != 0u && IsInt<20>(value) && !IsInt<12>(value)) {
+      // If `sll_shamts[num_sll_addi - 1u]` was only 12, we would have stopped
+      // the decomposition a step earlier with smaller `num_sll_addi`.
+      DCHECK_GT(sll_shamts[num_sll_addi - 1u], 12u);
+      // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate.
+      sll_shamts[num_sll_addi - 1u] -= 12u;
+      lui(rd, dchecked_integral_cast<uint32_t>(value & 0xfffff));
+    } else {
+      emit_simple_li_helper(rd, value, addi, addiw, slli, lui);
+    }
+    for (size_t i = num_sll_addi; i != 0u; ) {
+      --i;
+      slli(rd, rd, sll_shamts[i]);
+      if (addi_values[i] != 0) {
+        addi(rd, rd, addi_values[i]);
+      }
+    }
+  };
+  auto emit_with_slli_addi = [&](XRegister rd, int64_t value) {
+    emit_with_slli_addi_helper(rd, value, addi, addiw, slli, lui);
+  };
+  auto count_instructions_with_slli_addi = [&](int64_t value) {
+    size_t num_instructions = 0u;
+    auto count_rri = [&](XRegister, XRegister, int32_t) { ++num_instructions; };
+    auto count_ru = [&](XRegister, uint32_t) { ++num_instructions; };
+    emit_with_slli_addi_helper(Zero, value, count_rri, count_rri, count_rri, count_ru);
+    return num_instructions;
+  };
+
+  size_t insns_needed = count_instructions_with_slli_addi(imm);
+  size_t trailing_slli_shamt = 0u;
+  if (insns_needed > 2u) {
+    // Sometimes it's better to end with a SLLI even when the above code would end with ADDI.
+    if ((imm & 1) == 0 && (imm & 0xfff) != 0) {
+      int64_t value = imm >> CTZ(imm);
+      size_t new_insns_needed = count_instructions_with_slli_addi(value) + /*SLLI*/ 1u;
+      DCHECK_GT(new_insns_needed, 2u);
+      if (insns_needed > new_insns_needed) {
+        insns_needed = new_insns_needed;
+        trailing_slli_shamt = CTZ(imm);
+      }
+    }
+
+    // Sometimes we can emit a shorter sequence that ends with SRLI.
+    if (imm > 0) {
+      size_t shamt = CLZ(static_cast<uint64_t>(imm));
+      DCHECK_LE(shamt, 32u);  // Otherwise we would not get here as `insns_needed` would be <= 2.
+      if (imm == dchecked_integral_cast<int64_t>(MaxInt<uint64_t>(64 - shamt))) {
+        Addi(rd, Zero, -1);
+        Srli(rd, rd, shamt);
+        return;
+      }
+
+      int64_t value = static_cast<int64_t>(static_cast<uint64_t>(imm) << shamt);
+      DCHECK_LT(value, 0);
+      if (is_simple_li_value(value)){
+        size_t new_insns_needed = count_simple_li_instructions(value) + /*SRLI*/ 1u;
+        // In case of equal number of instructions, clang prefers the sequence without SRLI.
+        if (new_insns_needed < insns_needed) {
+          // If we emit ADDI, we set low bits that shall be shifted out to one in line with clang,
+          // effectively choosing to emit the negative constant closest to zero.
+          int32_t shifted_out = dchecked_integral_cast<int32_t>(MaxInt<uint32_t>(shamt));
+          DCHECK_EQ(value & shifted_out, 0);
+          emit_simple_li(rd, (value & 0xfff) == 0 ? value : value + shifted_out);
+          Srli(rd, rd, shamt);
+          return;
+        }
+      }
+
+      size_t ctz = CTZ(static_cast<uint64_t>(value));
+      if (IsInt(ctz + 20, value)) {
+        size_t new_insns_needed = /*ADDI or LUI*/ 1u + /*SLLI*/ 1u + /*SRLI*/ 1u;
+        if (new_insns_needed < insns_needed) {
+          // Clang prefers ADDI+SLLI+SRLI over LUI+SLLI+SRLI.
+          if (IsInt(ctz + 12, value)) {
+            Addi(rd, Zero, value >> ctz);
+            Slli(rd, rd, ctz);
+          } else {
+            Lui(rd, (static_cast<uint64_t>(value) >> ctz) & 0xfffffu);
+            Slli(rd, rd, ctz - 12);
+          }
+          Srli(rd, rd, shamt);
+          return;
+        }
+      }
+    }
+
+    // If we can use a scratch register, try using it to emit a shorter sequence. Without a
+    // scratch reg, the sequence is up to 8 instructions, with a scratch reg only up to 6.
+    if (can_use_tmp) {
+      int64_t low = (imm & 0xffffffff) - ((imm & 0x80000000) << 1);
+      int64_t remainder = imm - low;
+      size_t slli_shamt = CTZ(remainder);
+      DCHECK_GE(slli_shamt, 32u);
+      int64_t high = remainder >> slli_shamt;
+      size_t new_insns_needed =
+          ((IsInt<20>(high) || (high & 0xfff) == 0u) ? 1u : 2u) +
+          count_simple_li_instructions(low) +
+          /*SLLI+ADD*/ 2u;
+      if (new_insns_needed < insns_needed) {
+        DCHECK_NE(low & 0xfffff000, 0);
+        XRegister tmp = srs.AllocateXRegister();
+        if (IsInt<20>(high) && !IsInt<12>(high)) {
+          // Emit the signed 20-bit value with LUI and reduce the SLLI shamt by 12 to compensate.
+          Lui(rd, static_cast<uint32_t>(high & 0xfffff));
+          slli_shamt -= 12;
+        } else {
+          emit_simple_li(rd, high);
+        }
+        emit_simple_li(tmp, low);
+        Slli(rd, rd, slli_shamt);
+        Add(rd, rd, tmp);
+        return;
+      }
+    }
+  }
+  emit_with_slli_addi(rd, trailing_slli_shamt != 0u ? imm >> trailing_slli_shamt : imm);
+  if (trailing_slli_shamt != 0u) {
+    Slli(rd, rd, trailing_slli_shamt);
+  }
+}
+
+/////////////////////////////// RV64 VARIANTS extension end ////////////
+
+}  // namespace riscv64
+}  // namespace art
diff --git a/compiler/utils/riscv64/assembler_riscv64.h b/compiler/utils/riscv64/assembler_riscv64.h
new file mode 100644
index 0000000000..15f2518c87
--- /dev/null
+++ b/compiler/utils/riscv64/assembler_riscv64.h
@@ -0,0 +1,1178 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
+#define ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
+
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arch/riscv64/instruction_set_features_riscv64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/globals.h"
+#include "base/macros.h"
+#include "managed_register_riscv64.h"
+#include "utils/assembler.h"
+#include "utils/label.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+class ScratchRegisterScope;
+
+static constexpr size_t kRiscv64HalfwordSize = 2;
+static constexpr size_t kRiscv64WordSize = 4;
+static constexpr size_t kRiscv64DoublewordSize = 8;
+static constexpr size_t kRiscv64FloatRegSizeInBytes = 8;
+
+enum class FPRoundingMode : uint32_t {
+  kRNE = 0x0,  // Round to Nearest, ties to Even
+  kRTZ = 0x1,  // Round towards Zero
+  kRDN = 0x2,  // Round Down (towards −Infinity)
+  kRUP = 0x3,  // Round Up (towards +Infinity)
+  kRMM = 0x4,  // Round to Nearest, ties to Max Magnitude
+  kDYN = 0x7,  // Dynamic rounding mode
+  kDefault = kDYN,
+  // Some instructions never need to round even though the spec includes the RM field.
+  // To simplify testing, emit the RM as 0 by default for these instructions because that's what
+  // `clang` does and because the `llvm-objdump` fails to disassemble the other rounding modes.
+  kIgnored = 0
+};
+
+enum class AqRl : uint32_t {
+  kNone    = 0x0,
+  kRelease = 0x1,
+  kAcquire = 0x2,
+  kAqRl    = kRelease | kAcquire
+};
+
+// the type for fence
+enum FenceType {
+  kFenceNone = 0,
+  kFenceWrite = 1,
+  kFenceRead = 2,
+  kFenceOutput = 4,
+  kFenceInput = 8,
+  kFenceDefault = 0xf,
+};
+
+// Used to test the values returned by FClassS/FClassD.
+enum FPClassMaskType {
+  kNegativeInfinity  = 0x001,
+  kNegativeNormal    = 0x002,
+  kNegativeSubnormal = 0x004,
+  kNegativeZero      = 0x008,
+  kPositiveZero      = 0x010,
+  kPositiveSubnormal = 0x020,
+  kPositiveNormal    = 0x040,
+  kPositiveInfinity  = 0x080,
+  kSignalingNaN      = 0x100,
+  kQuietNaN          = 0x200,
+};
+
+class Riscv64Label : public Label {
+ public:
+  Riscv64Label() : prev_branch_id_(kNoPrevBranchId) {}
+
+  Riscv64Label(Riscv64Label&& src) noexcept
+      // NOLINTNEXTLINE - src.prev_branch_id_ is valid after the move
+      : Label(std::move(src)), prev_branch_id_(src.prev_branch_id_) {}
+
+ private:
+  static constexpr uint32_t kNoPrevBranchId = std::numeric_limits<uint32_t>::max();
+
+  uint32_t prev_branch_id_;  // To get distance from preceding branch, if any.
+
+  friend class Riscv64Assembler;
+  DISALLOW_COPY_AND_ASSIGN(Riscv64Label);
+};
+
+// Assembler literal is a value embedded in code, retrieved using a PC-relative load.
+class Literal {
+ public:
+  static constexpr size_t kMaxSize = 8;
+
+  Literal(uint32_t size, const uint8_t* data) : label_(), size_(size) {
+    DCHECK_LE(size, Literal::kMaxSize);
+    memcpy(data_, data, size);
+  }
+
+  template <typename T>
+  T GetValue() const {
+    DCHECK_EQ(size_, sizeof(T));
+    T value;
+    memcpy(&value, data_, sizeof(T));
+    return value;
+  }
+
+  uint32_t GetSize() const { return size_; }
+
+  const uint8_t* GetData() const { return data_; }
+
+  Riscv64Label* GetLabel() { return &label_; }
+
+  const Riscv64Label* GetLabel() const { return &label_; }
+
+ private:
+  Riscv64Label label_;
+  const uint32_t size_;
+  uint8_t data_[kMaxSize];
+
+  DISALLOW_COPY_AND_ASSIGN(Literal);
+};
+
+// Jump table: table of labels emitted after the code and before the literals. Similar to literals.
+class JumpTable {
+ public:
+  explicit JumpTable(ArenaVector<Riscv64Label*>&& labels) : label_(), labels_(std::move(labels)) {}
+
+  size_t GetSize() const { return labels_.size() * sizeof(int32_t); }
+
+  const ArenaVector<Riscv64Label*>& GetData() const { return labels_; }
+
+  Riscv64Label* GetLabel() { return &label_; }
+
+  const Riscv64Label* GetLabel() const { return &label_; }
+
+ private:
+  Riscv64Label label_;
+  ArenaVector<Riscv64Label*> labels_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTable);
+};
+
+class Riscv64Assembler final : public Assembler {
+ public:
+  explicit Riscv64Assembler(ArenaAllocator* allocator,
+                            const Riscv64InstructionSetFeatures* instruction_set_features = nullptr)
+      : Assembler(allocator),
+        branches_(allocator->Adapter(kArenaAllocAssembler)),
+        finalized_(false),
+        overwriting_(false),
+        overwrite_location_(0),
+        literals_(allocator->Adapter(kArenaAllocAssembler)),
+        long_literals_(allocator->Adapter(kArenaAllocAssembler)),
+        jump_tables_(allocator->Adapter(kArenaAllocAssembler)),
+        last_position_adjustment_(0),
+        last_old_position_(0),
+        last_branch_id_(0),
+        available_scratch_core_registers_((1u << TMP) | (1u << TMP2)),
+        available_scratch_fp_registers_(1u << FTMP) {
+    UNUSED(instruction_set_features);
+    cfi().DelayEmittingAdvancePCs();
+  }
+
+  virtual ~Riscv64Assembler() {
+    for (auto& branch : branches_) {
+      CHECK(branch.IsResolved());
+    }
+  }
+
+  size_t CodeSize() const override { return Assembler::CodeSize(); }
+  DebugFrameOpCodeWriterForAssembler& cfi() { return Assembler::cfi(); }
+
+  // According to "The RISC-V Instruction Set Manual"
+
+  // LUI/AUIPC (RV32I, with sign-extension on RV64I), opcode = 0x17, 0x37
+  // Note: These take a 20-bit unsigned value to align with the clang assembler for testing,
+  // but the value stored in the register shall actually be sign-extended to 64 bits.
+  void Lui(XRegister rd, uint32_t imm20);
+  void Auipc(XRegister rd, uint32_t imm20);
+
+  // Jump instructions (RV32I), opcode = 0x67, 0x6f
+  void Jal(XRegister rd, int32_t offset);
+  void Jalr(XRegister rd, XRegister rs1, int32_t offset);
+
+  // Branch instructions (RV32I), opcode = 0x63, funct3 from 0x0 ~ 0x1 and 0x4 ~ 0x7
+  void Beq(XRegister rs1, XRegister rs2, int32_t offset);
+  void Bne(XRegister rs1, XRegister rs2, int32_t offset);
+  void Blt(XRegister rs1, XRegister rs2, int32_t offset);
+  void Bge(XRegister rs1, XRegister rs2, int32_t offset);
+  void Bltu(XRegister rs1, XRegister rs2, int32_t offset);
+  void Bgeu(XRegister rs1, XRegister rs2, int32_t offset);
+
+  // Load instructions (RV32I+RV64I): opcode = 0x03, funct3 from 0x0 ~ 0x6
+  void Lb(XRegister rd, XRegister rs1, int32_t offset);
+  void Lh(XRegister rd, XRegister rs1, int32_t offset);
+  void Lw(XRegister rd, XRegister rs1, int32_t offset);
+  void Ld(XRegister rd, XRegister rs1, int32_t offset);
+  void Lbu(XRegister rd, XRegister rs1, int32_t offset);
+  void Lhu(XRegister rd, XRegister rs1, int32_t offset);
+  void Lwu(XRegister rd, XRegister rs1, int32_t offset);
+
+  // Store instructions (RV32I+RV64I): opcode = 0x23, funct3 from 0x0 ~ 0x3
+  void Sb(XRegister rs2, XRegister rs1, int32_t offset);
+  void Sh(XRegister rs2, XRegister rs1, int32_t offset);
+  void Sw(XRegister rs2, XRegister rs1, int32_t offset);
+  void Sd(XRegister rs2, XRegister rs1, int32_t offset);
+
+  // IMM ALU instructions (RV32I): opcode = 0x13, funct3 from 0x0 ~ 0x7
+  void Addi(XRegister rd, XRegister rs1, int32_t imm12);
+  void Slti(XRegister rd, XRegister rs1, int32_t imm12);
+  void Sltiu(XRegister rd, XRegister rs1, int32_t imm12);
+  void Xori(XRegister rd, XRegister rs1, int32_t imm12);
+  void Ori(XRegister rd, XRegister rs1, int32_t imm12);
+  void Andi(XRegister rd, XRegister rs1, int32_t imm12);
+  void Slli(XRegister rd, XRegister rs1, int32_t shamt);
+  void Srli(XRegister rd, XRegister rs1, int32_t shamt);
+  void Srai(XRegister rd, XRegister rs1, int32_t shamt);
+
+  // ALU instructions (RV32I): opcode = 0x33, funct3 from 0x0 ~ 0x7
+  void Add(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sub(XRegister rd, XRegister rs1, XRegister rs2);
+  void Slt(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sltu(XRegister rd, XRegister rs1, XRegister rs2);
+  void Xor(XRegister rd, XRegister rs1, XRegister rs2);
+  void Or(XRegister rd, XRegister rs1, XRegister rs2);
+  void And(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sll(XRegister rd, XRegister rs1, XRegister rs2);
+  void Srl(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sra(XRegister rd, XRegister rs1, XRegister rs2);
+
+  // 32bit Imm ALU instructions (RV64I): opcode = 0x1b, funct3 from 0x0, 0x1, 0x5
+  void Addiw(XRegister rd, XRegister rs1, int32_t imm12);
+  void Slliw(XRegister rd, XRegister rs1, int32_t shamt);
+  void Srliw(XRegister rd, XRegister rs1, int32_t shamt);
+  void Sraiw(XRegister rd, XRegister rs1, int32_t shamt);
+
+  // 32bit ALU instructions (RV64I): opcode = 0x3b, funct3 from 0x0 ~ 0x7
+  void Addw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Subw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sllw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Srlw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sraw(XRegister rd, XRegister rs1, XRegister rs2);
+
+  // Environment call and breakpoint (RV32I), opcode = 0x73
+  void Ecall();
+  void Ebreak();
+
+  // Fence instruction (RV32I): opcode = 0xf, funct3 = 0
+  void Fence(uint32_t pred = kFenceDefault, uint32_t succ = kFenceDefault);
+  void FenceTso();
+
+  // "Zifencei" Standard Extension, opcode = 0xf, funct3 = 1
+  void FenceI();
+
+  // RV32M Standard Extension: opcode = 0x33, funct3 from 0x0 ~ 0x7
+  void Mul(XRegister rd, XRegister rs1, XRegister rs2);
+  void Mulh(XRegister rd, XRegister rs1, XRegister rs2);
+  void Mulhsu(XRegister rd, XRegister rs1, XRegister rs2);
+  void Mulhu(XRegister rd, XRegister rs1, XRegister rs2);
+  void Div(XRegister rd, XRegister rs1, XRegister rs2);
+  void Divu(XRegister rd, XRegister rs1, XRegister rs2);
+  void Rem(XRegister rd, XRegister rs1, XRegister rs2);
+  void Remu(XRegister rd, XRegister rs1, XRegister rs2);
+
+  // RV64M Standard Extension: opcode = 0x3b, funct3 0x0 and from 0x4 ~ 0x7
+  void Mulw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Divw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Divuw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Remw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Remuw(XRegister rd, XRegister rs1, XRegister rs2);
+
+  // RV32A/RV64A Standard Extension
+  void LrW(XRegister rd, XRegister rs1, AqRl aqrl);
+  void LrD(XRegister rd, XRegister rs1, AqRl aqrl);
+  void ScW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void ScD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoSwapW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoSwapD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoAddW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoAddD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoXorW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoXorD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoAndW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoAndD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoOrW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoOrD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMinW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMinD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMaxW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMaxD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMinuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMinuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMaxuW(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+  void AmoMaxuD(XRegister rd, XRegister rs2, XRegister rs1, AqRl aqrl);
+
+  // "Zicsr" Standard Extension, opcode = 0x73, funct3 from 0x1 ~ 0x3 and 0x5 ~ 0x7
+  void Csrrw(XRegister rd, uint32_t csr, XRegister rs1);
+  void Csrrs(XRegister rd, uint32_t csr, XRegister rs1);
+  void Csrrc(XRegister rd, uint32_t csr, XRegister rs1);
+  void Csrrwi(XRegister rd, uint32_t csr, uint32_t uimm5);
+  void Csrrsi(XRegister rd, uint32_t csr, uint32_t uimm5);
+  void Csrrci(XRegister rd, uint32_t csr, uint32_t uimm5);
+
+  // FP load/store instructions (RV32F+RV32D): opcode = 0x07, 0x27
+  void FLw(FRegister rd, XRegister rs1, int32_t offset);
+  void FLd(FRegister rd, XRegister rs1, int32_t offset);
+  void FSw(FRegister rs2, XRegister rs1, int32_t offset);
+  void FSd(FRegister rs2, XRegister rs1, int32_t offset);
+
+  // FP FMA instructions (RV32F+RV32D): opcode = 0x43, 0x47, 0x4b, 0x4f
+  void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+  void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+  void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+  void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+  void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+  void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+  void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+  void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3, FPRoundingMode frm);
+
+  // FP FMA instruction helpers passing the default rounding mode.
+  void FMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+  void FMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+  void FMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+  void FMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+  void FNMSubS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FNMSubS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+  void FNMSubD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FNMSubD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+  void FNMAddS(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FNMAddS(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+  void FNMAddD(FRegister rd, FRegister rs1, FRegister rs2, FRegister rs3) {
+    FNMAddD(rd, rs1, rs2, rs3, FPRoundingMode::kDefault);
+  }
+
+  // Simple FP instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b0XXXX0D
+  void FAddS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FAddD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FSubS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FSubD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FMulS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FMulD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FDivS(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FDivD(FRegister rd, FRegister rs1, FRegister rs2, FPRoundingMode frm);
+  void FSqrtS(FRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FSqrtD(FRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FSgnjS(FRegister rd, FRegister rs1, FRegister rs2);
+  void FSgnjD(FRegister rd, FRegister rs1, FRegister rs2);
+  void FSgnjnS(FRegister rd, FRegister rs1, FRegister rs2);
+  void FSgnjnD(FRegister rd, FRegister rs1, FRegister rs2);
+  void FSgnjxS(FRegister rd, FRegister rs1, FRegister rs2);
+  void FSgnjxD(FRegister rd, FRegister rs1, FRegister rs2);
+  void FMinS(FRegister rd, FRegister rs1, FRegister rs2);
+  void FMinD(FRegister rd, FRegister rs1, FRegister rs2);
+  void FMaxS(FRegister rd, FRegister rs1, FRegister rs2);
+  void FMaxD(FRegister rd, FRegister rs1, FRegister rs2);
+  void FCvtSD(FRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtDS(FRegister rd, FRegister rs1, FPRoundingMode frm);
+
+  // Simple FP instruction helpers passing the default rounding mode.
+  void FAddS(FRegister rd, FRegister rs1, FRegister rs2) {
+    FAddS(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FAddD(FRegister rd, FRegister rs1, FRegister rs2) {
+    FAddD(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FSubS(FRegister rd, FRegister rs1, FRegister rs2) {
+    FSubS(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FSubD(FRegister rd, FRegister rs1, FRegister rs2) {
+    FSubD(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FMulS(FRegister rd, FRegister rs1, FRegister rs2) {
+    FMulS(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FMulD(FRegister rd, FRegister rs1, FRegister rs2) {
+    FMulD(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FDivS(FRegister rd, FRegister rs1, FRegister rs2) {
+    FDivS(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FDivD(FRegister rd, FRegister rs1, FRegister rs2) {
+    FDivD(rd, rs1, rs2, FPRoundingMode::kDefault);
+  }
+  void FSqrtS(FRegister rd, FRegister rs1) {
+    FSqrtS(rd, rs1, FPRoundingMode::kDefault);
+  }
+  void FSqrtD(FRegister rd, FRegister rs1) {
+    FSqrtD(rd, rs1, FPRoundingMode::kDefault);
+  }
+  void FCvtSD(FRegister rd, FRegister rs1) {
+    FCvtSD(rd, rs1, FPRoundingMode::kDefault);
+  }
+  void FCvtDS(FRegister rd, FRegister rs1) {
+    FCvtDS(rd, rs1, FPRoundingMode::kIgnored);
+  }
+
+  // FP compare instructions (RV32F+RV32D): opcode = 0x53, funct7 = 0b101000D
+  void FEqS(XRegister rd, FRegister rs1, FRegister rs2);
+  void FEqD(XRegister rd, FRegister rs1, FRegister rs2);
+  void FLtS(XRegister rd, FRegister rs1, FRegister rs2);
+  void FLtD(XRegister rd, FRegister rs1, FRegister rs2);
+  void FLeS(XRegister rd, FRegister rs1, FRegister rs2);
+  void FLeD(XRegister rd, FRegister rs1, FRegister rs2);
+
+  // FP conversion instructions (RV32F+RV32D+RV64F+RV64D): opcode = 0x53, funct7 = 0b110X00D
+  void FCvtWS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtWD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtWuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtWuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtLS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtLD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtLuS(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtLuD(XRegister rd, FRegister rs1, FPRoundingMode frm);
+  void FCvtSW(FRegister rd, XRegister rs1, FPRoundingMode frm);
+  void FCvtDW(FRegister rd, XRegister rs1, FPRoundingMode frm);
+  void FCvtSWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+  void FCvtDWu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+  void FCvtSL(FRegister rd, XRegister rs1, FPRoundingMode frm);
+  void FCvtDL(FRegister rd, XRegister rs1, FPRoundingMode frm);
+  void FCvtSLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+  void FCvtDLu(FRegister rd, XRegister rs1, FPRoundingMode frm);
+
+  // FP conversion instruction helpers passing the default rounding mode.
+  void FCvtWS(XRegister rd, FRegister rs1) { FCvtWS(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtWD(XRegister rd, FRegister rs1) { FCvtWD(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtWuS(XRegister rd, FRegister rs1) { FCvtWuS(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtWuD(XRegister rd, FRegister rs1) { FCvtWuD(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtLS(XRegister rd, FRegister rs1) { FCvtLS(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtLD(XRegister rd, FRegister rs1) { FCvtLD(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtLuS(XRegister rd, FRegister rs1) { FCvtLuS(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtLuD(XRegister rd, FRegister rs1) { FCvtLuD(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtSW(FRegister rd, XRegister rs1) { FCvtSW(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtDW(FRegister rd, XRegister rs1) { FCvtDW(rd, rs1, FPRoundingMode::kIgnored); }
+  void FCvtSWu(FRegister rd, XRegister rs1) { FCvtSWu(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtDWu(FRegister rd, XRegister rs1) { FCvtDWu(rd, rs1, FPRoundingMode::kIgnored); }
+  void FCvtSL(FRegister rd, XRegister rs1) { FCvtSL(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtDL(FRegister rd, XRegister rs1) { FCvtDL(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtSLu(FRegister rd, XRegister rs1) { FCvtSLu(rd, rs1, FPRoundingMode::kDefault); }
+  void FCvtDLu(FRegister rd, XRegister rs1) { FCvtDLu(rd, rs1, FPRoundingMode::kDefault); }
+
+  // FP move instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x0, funct7 = 0b111X00D
+  void FMvXW(XRegister rd, FRegister rs1);
+  void FMvXD(XRegister rd, FRegister rs1);
+  void FMvWX(FRegister rd, XRegister rs1);
+  void FMvDX(FRegister rd, XRegister rs1);
+
+  // FP classify instructions (RV32F+RV32D): opcode = 0x53, funct3 = 0x1, funct7 = 0b111X00D
+  void FClassS(XRegister rd, FRegister rs1);
+  void FClassD(XRegister rd, FRegister rs1);
+
+  // "Zba" Standard Extension, opcode = 0x1b, 0x33 or 0x3b, funct3 and funct7 varies.
+  void AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sh1Add(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sh1AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sh2Add(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sh2AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sh3Add(XRegister rd, XRegister rs1, XRegister rs2);
+  void Sh3AddUw(XRegister rd, XRegister rs1, XRegister rs2);
+  void SlliUw(XRegister rd, XRegister rs1, int32_t shamt);
+
+  // "Zbb" Standard Extension, opcode = 0x13, 0x1b or 0x33, funct3 and funct7 varies.
+  // Note: We do not support 32-bit sext.b, sext.h and zext.h from the Zbb extension.
+  // (Neither does the clang-r498229's assembler which we currently test against.)
+  void Andn(XRegister rd, XRegister rs1, XRegister rs2);
+  void Orn(XRegister rd, XRegister rs1, XRegister rs2);
+  void Xnor(XRegister rd, XRegister rs1, XRegister rs2);
+  void Clz(XRegister rd, XRegister rs1);
+  void Clzw(XRegister rd, XRegister rs1);
+  void Ctz(XRegister rd, XRegister rs1);
+  void Ctzw(XRegister rd, XRegister rs1);
+  void Cpop(XRegister rd, XRegister rs1);
+  void Cpopw(XRegister rd, XRegister rs1);
+  void Min(XRegister rd, XRegister rs1, XRegister rs2);
+  void Minu(XRegister rd, XRegister rs1, XRegister rs2);
+  void Max(XRegister rd, XRegister rs1, XRegister rs2);
+  void Maxu(XRegister rd, XRegister rs1, XRegister rs2);
+  void Rol(XRegister rd, XRegister rs1, XRegister rs2);
+  void Rolw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Ror(XRegister rd, XRegister rs1, XRegister rs2);
+  void Rorw(XRegister rd, XRegister rs1, XRegister rs2);
+  void Rori(XRegister rd, XRegister rs1, int32_t shamt);
+  void Roriw(XRegister rd, XRegister rs1, int32_t shamt);
+  void OrcB(XRegister rd, XRegister rs1);
+  void Rev8(XRegister rd, XRegister rs1);
+
+  ////////////////////////////// RV64 MACRO Instructions  START ///////////////////////////////
+  // These pseudo instructions are from "RISC-V Assembly Programmer's Manual".
+
+  void Nop();
+  void Li(XRegister rd, int64_t imm);
+  void Mv(XRegister rd, XRegister rs);
+  void Not(XRegister rd, XRegister rs);
+  void Neg(XRegister rd, XRegister rs);
+  void NegW(XRegister rd, XRegister rs);
+  void SextB(XRegister rd, XRegister rs);
+  void SextH(XRegister rd, XRegister rs);
+  void SextW(XRegister rd, XRegister rs);
+  void ZextB(XRegister rd, XRegister rs);
+  void ZextH(XRegister rd, XRegister rs);
+  void ZextW(XRegister rd, XRegister rs);
+  void Seqz(XRegister rd, XRegister rs);
+  void Snez(XRegister rd, XRegister rs);
+  void Sltz(XRegister rd, XRegister rs);
+  void Sgtz(XRegister rd, XRegister rs);
+  void FMvS(FRegister rd, FRegister rs);
+  void FAbsS(FRegister rd, FRegister rs);
+  void FNegS(FRegister rd, FRegister rs);
+  void FMvD(FRegister rd, FRegister rs);
+  void FAbsD(FRegister rd, FRegister rs);
+  void FNegD(FRegister rd, FRegister rs);
+
+  // Branch pseudo instructions
+  void Beqz(XRegister rs, int32_t offset);
+  void Bnez(XRegister rs, int32_t offset);
+  void Blez(XRegister rs, int32_t offset);
+  void Bgez(XRegister rs, int32_t offset);
+  void Bltz(XRegister rs, int32_t offset);
+  void Bgtz(XRegister rs, int32_t offset);
+  void Bgt(XRegister rs, XRegister rt, int32_t offset);
+  void Ble(XRegister rs, XRegister rt, int32_t offset);
+  void Bgtu(XRegister rs, XRegister rt, int32_t offset);
+  void Bleu(XRegister rs, XRegister rt, int32_t offset);
+
+  // Jump pseudo instructions
+  void J(int32_t offset);
+  void Jal(int32_t offset);
+  void Jr(XRegister rs);
+  void Jalr(XRegister rs);
+  void Jalr(XRegister rd, XRegister rs);
+  void Ret();
+
+  // Pseudo instructions for accessing control and status registers
+  void RdCycle(XRegister rd);
+  void RdTime(XRegister rd);
+  void RdInstret(XRegister rd);
+  void Csrr(XRegister rd, uint32_t csr);
+  void Csrw(uint32_t csr, XRegister rs);
+  void Csrs(uint32_t csr, XRegister rs);
+  void Csrc(uint32_t csr, XRegister rs);
+  void Csrwi(uint32_t csr, uint32_t uimm5);
+  void Csrsi(uint32_t csr, uint32_t uimm5);
+  void Csrci(uint32_t csr, uint32_t uimm5);
+
+  // Load/store macros for arbitrary 32-bit offsets.
+  void Loadb(XRegister rd, XRegister rs1, int32_t offset);
+  void Loadh(XRegister rd, XRegister rs1, int32_t offset);
+  void Loadw(XRegister rd, XRegister rs1, int32_t offset);
+  void Loadd(XRegister rd, XRegister rs1, int32_t offset);
+  void Loadbu(XRegister rd, XRegister rs1, int32_t offset);
+  void Loadhu(XRegister rd, XRegister rs1, int32_t offset);
+  void Loadwu(XRegister rd, XRegister rs1, int32_t offset);
+  void Storeb(XRegister rs2, XRegister rs1, int32_t offset);
+  void Storeh(XRegister rs2, XRegister rs1, int32_t offset);
+  void Storew(XRegister rs2, XRegister rs1, int32_t offset);
+  void Stored(XRegister rs2, XRegister rs1, int32_t offset);
+  void FLoadw(FRegister rd, XRegister rs1, int32_t offset);
+  void FLoadd(FRegister rd, XRegister rs1, int32_t offset);
+  void FStorew(FRegister rs2, XRegister rs1, int32_t offset);
+  void FStored(FRegister rs2, XRegister rs1, int32_t offset);
+
+  // Macros for loading constants.
+  void LoadConst32(XRegister rd, int32_t value);
+  void LoadConst64(XRegister rd, int64_t value);
+
+  // Macros for adding constants.
+  void AddConst32(XRegister rd, XRegister rs1, int32_t value);
+  void AddConst64(XRegister rd, XRegister rs1, int64_t value);
+
+  // Jumps and branches to a label.
+  void Beqz(XRegister rs, Riscv64Label* label, bool is_bare = false);
+  void Bnez(XRegister rs, Riscv64Label* label, bool is_bare = false);
+  void Blez(XRegister rs, Riscv64Label* label, bool is_bare = false);
+  void Bgez(XRegister rs, Riscv64Label* label, bool is_bare = false);
+  void Bltz(XRegister rs, Riscv64Label* label, bool is_bare = false);
+  void Bgtz(XRegister rs, Riscv64Label* label, bool is_bare = false);
+  void Beq(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Bne(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Ble(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Bge(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Blt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Bgt(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Bleu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Bgeu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Bltu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Bgtu(XRegister rs, XRegister rt, Riscv64Label* label, bool is_bare = false);
+  void Jal(XRegister rd, Riscv64Label* label, bool is_bare = false);
+  void J(Riscv64Label* label, bool is_bare = false);
+  void Jal(Riscv64Label* label, bool is_bare = false);
+
+  // Literal load.
+  void Loadw(XRegister rd, Literal* literal);
+  void Loadwu(XRegister rd, Literal* literal);
+  void Loadd(XRegister rd, Literal* literal);
+  void FLoadw(FRegister rd, Literal* literal);
+  void FLoadd(FRegister rd, Literal* literal);
+
+  // Illegal instruction that triggers SIGILL.
+  void Unimp();
+
+  /////////////////////////////// RV64 MACRO Instructions END ///////////////////////////////
+
+  void Bind(Label* label) override { Bind(down_cast<Riscv64Label*>(label)); }
+
+  void Jump([[maybe_unused]] Label* label) override {
+    UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64";
+  }
+
+  void Bind(Riscv64Label* label);
+
+  // Load label address using PC-relative loads.
+  void LoadLabelAddress(XRegister rd, Riscv64Label* label);
+
+  // Create a new literal with a given value.
+  // NOTE:Use `Identity<>` to force the template parameter to be explicitly specified.
+  template <typename T>
+  Literal* NewLiteral(typename Identity<T>::type value) {
+    static_assert(std::is_integral<T>::value, "T must be an integral type.");
+    return NewLiteral(sizeof(value), reinterpret_cast<const uint8_t*>(&value));
+  }
+
+  // Create a new literal with the given data.
+  Literal* NewLiteral(size_t size, const uint8_t* data);
+
+  // Create a jump table for the given labels that will be emitted when finalizing.
+  // When the table is emitted, offsets will be relative to the location of the table.
+  // The table location is determined by the location of its label (the label precedes
+  // the table data) and should be loaded using LoadLabelAddress().
+  JumpTable* CreateJumpTable(ArenaVector<Riscv64Label*>&& labels);
+
+ public:
+  // Emit slow paths queued during assembly, promote short branches to long if needed,
+  // and emit branches.
+  void FinalizeCode() override;
+
+  // Returns the current location of a label.
+  //
+  // This function must be used instead of `Riscv64Label::GetPosition()`
+  // which returns assembler's internal data instead of an actual location.
+  //
+  // The location can change during branch fixup in `FinalizeCode()`. Before that,
+  // the location is not final and therefore not very useful to external users,
+  // so they should preferably retrieve the location only after `FinalizeCode()`.
+  uint32_t GetLabelLocation(const Riscv64Label* label) const;
+
+  // Get the final position of a label after local fixup based on the old position
+  // recorded before FinalizeCode().
+  uint32_t GetAdjustedPosition(uint32_t old_position);
+
+ private:
+  enum BranchCondition : uint8_t {
+    kCondEQ,
+    kCondNE,
+    kCondLT,
+    kCondGE,
+    kCondLE,
+    kCondGT,
+    kCondLTU,
+    kCondGEU,
+    kCondLEU,
+    kCondGTU,
+    kUncond,
+  };
+
+  // Note that PC-relative literal loads are handled as pseudo branches because they need
+  // to be emitted after branch relocation to use correct offsets.
+  class Branch {
+   public:
+    enum Type : uint8_t {
+      // TODO(riscv64): Support 16-bit instructions ("C" Standard Extension).
+
+      // Short branches (can be promoted to longer).
+      kCondBranch,
+      kUncondBranch,
+      kCall,
+      // Short branches (can't be promoted to longer).
+      // TODO(riscv64): Do we need these (untested) bare branches, or can we remove them?
+      kBareCondBranch,
+      kBareUncondBranch,
+      kBareCall,
+
+      // Medium branch (can be promoted to long).
+      kCondBranch21,
+
+      // Long branches.
+      kLongCondBranch,
+      kLongUncondBranch,
+      kLongCall,
+
+      // Label.
+      kLabel,
+
+      // Literals.
+      kLiteral,
+      kLiteralUnsigned,
+      kLiteralLong,
+      kLiteralFloat,
+      kLiteralDouble,
+    };
+
+    // Bit sizes of offsets defined as enums to minimize chance of typos.
+    enum OffsetBits {
+      kOffset13 = 13,
+      kOffset21 = 21,
+      kOffset32 = 32,
+    };
+
+    static constexpr uint32_t kUnresolved = 0xffffffff;  // Unresolved target_
+    static constexpr uint32_t kMaxBranchLength = 12;  // In bytes.
+
+    struct BranchInfo {
+      // Branch length in bytes.
+      uint32_t length;
+      // The offset in bytes of the PC used in the (only) PC-relative instruction from
+      // the start of the branch sequence. RISC-V always uses the address of the PC-relative
+      // instruction as the PC, so this is essentially the offset of that instruction.
+      uint32_t pc_offset;
+      // How large (in bits) a PC-relative offset can be for a given type of branch.
+      OffsetBits offset_size;
+    };
+    static const BranchInfo branch_info_[/* Type */];
+
+    // Unconditional branch or call.
+    Branch(uint32_t location, uint32_t target, XRegister rd, bool is_bare);
+    // Conditional branch.
+    Branch(uint32_t location,
+           uint32_t target,
+           BranchCondition condition,
+           XRegister lhs_reg,
+           XRegister rhs_reg,
+           bool is_bare);
+    // Label address or literal.
+    Branch(uint32_t location, uint32_t target, XRegister rd, Type label_or_literal_type);
+    Branch(uint32_t location, uint32_t target, FRegister rd, Type literal_type);
+
+    // Some conditional branches with lhs = rhs are effectively NOPs, while some
+    // others are effectively unconditional.
+    static bool IsNop(BranchCondition condition, XRegister lhs, XRegister rhs);
+    static bool IsUncond(BranchCondition condition, XRegister lhs, XRegister rhs);
+
+    static BranchCondition OppositeCondition(BranchCondition cond);
+
+    Type GetType() const;
+    BranchCondition GetCondition() const;
+    XRegister GetLeftRegister() const;
+    XRegister GetRightRegister() const;
+    FRegister GetFRegister() const;
+    uint32_t GetTarget() const;
+    uint32_t GetLocation() const;
+    uint32_t GetOldLocation() const;
+    uint32_t GetLength() const;
+    uint32_t GetOldLength() const;
+    uint32_t GetEndLocation() const;
+    uint32_t GetOldEndLocation() const;
+    bool IsBare() const;
+    bool IsResolved() const;
+
+    // Returns the bit size of the signed offset that the branch instruction can handle.
+    OffsetBits GetOffsetSize() const;
+
+    // Calculates the distance between two byte locations in the assembler buffer and
+    // returns the number of bits needed to represent the distance as a signed integer.
+    static OffsetBits GetOffsetSizeNeeded(uint32_t location, uint32_t target);
+
+    // Resolve a branch when the target is known.
+    void Resolve(uint32_t target);
+
+    // Relocate a branch by a given delta if needed due to expansion of this or another
+    // branch at a given location by this delta (just changes location_ and target_).
+    void Relocate(uint32_t expand_location, uint32_t delta);
+
+    // If necessary, updates the type by promoting a short branch to a longer branch
+    // based on the branch location and target. Returns the amount (in bytes) by
+    // which the branch size has increased.
+    uint32_t PromoteIfNeeded();
+
+    // Returns the offset into assembler buffer that shall be used as the base PC for
+    // offset calculation. RISC-V always uses the address of the PC-relative instruction
+    // as the PC, so this is essentially the location of that instruction.
+    uint32_t GetOffsetLocation() const;
+
+    // Calculates and returns the offset ready for encoding in the branch instruction(s).
+    int32_t GetOffset() const;
+
+   private:
+    // Completes branch construction by determining and recording its type.
+    void InitializeType(Type initial_type);
+    // Helper for the above.
+    void InitShortOrLong(OffsetBits ofs_size, Type short_type, Type long_type, Type longest_type);
+
+    uint32_t old_location_;  // Offset into assembler buffer in bytes.
+    uint32_t location_;      // Offset into assembler buffer in bytes.
+    uint32_t target_;        // Offset into assembler buffer in bytes.
+
+    XRegister lhs_reg_;          // Left-hand side register in conditional branches or
+                                 // destination register in calls or literals.
+    XRegister rhs_reg_;          // Right-hand side register in conditional branches.
+    FRegister freg_;             // Destination register in FP literals.
+    BranchCondition condition_;  // Condition for conditional branches.
+
+    Type type_;      // Current type of the branch.
+    Type old_type_;  // Initial type of the branch.
+  };
+
+  // Branch and literal fixup.
+
+  void EmitBcond(BranchCondition cond, XRegister rs, XRegister rt, int32_t offset);
+  void EmitBranch(Branch* branch);
+  void EmitBranches();
+  void EmitJumpTables();
+  void EmitLiterals();
+
+  void FinalizeLabeledBranch(Riscv64Label* label);
+  void Bcond(Riscv64Label* label,
+             bool is_bare,
+             BranchCondition condition,
+             XRegister lhs,
+             XRegister rhs);
+  void Buncond(Riscv64Label* label, XRegister rd, bool is_bare);
+  template <typename XRegisterOrFRegister>
+  void LoadLiteral(Literal* literal, XRegisterOrFRegister rd, Branch::Type literal_type);
+
+  Branch* GetBranch(uint32_t branch_id);
+  const Branch* GetBranch(uint32_t branch_id) const;
+
+  void ReserveJumpTableSpace();
+  void PromoteBranches();
+  void PatchCFI();
+
+  // Emit data (e.g. encoded instruction or immediate) to the instruction stream.
+  void Emit(uint32_t value);
+
+  // Adjust base register and offset if needed for load/store with a large offset.
+  void AdjustBaseAndOffset(XRegister& base, int32_t& offset, ScratchRegisterScope& srs);
+
+  // Helper templates for loads/stores with 32-bit offsets.
+  template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+  void LoadFromOffset(XRegister rd, XRegister rs1, int32_t offset);
+  template <void (Riscv64Assembler::*insn)(XRegister, XRegister, int32_t)>
+  void StoreToOffset(XRegister rs2, XRegister rs1, int32_t offset);
+  template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+  void FLoadFromOffset(FRegister rd, XRegister rs1, int32_t offset);
+  template <void (Riscv64Assembler::*insn)(FRegister, XRegister, int32_t)>
+  void FStoreToOffset(FRegister rs2, XRegister rs1, int32_t offset);
+
+  // Implementation helper for `Li()`, `LoadConst32()` and `LoadConst64()`.
+  void LoadImmediate(XRegister rd, int64_t imm, bool can_use_tmp);
+
+  // Emit helpers.
+
+  // I-type instruction:
+  //
+  //    31                   20 19     15 14 12 11      7 6           0
+  //   -----------------------------------------------------------------
+  //   [ . . . . . . . . . . . | . . . . | . . | . . . . | . . . . . . ]
+  //   [        imm11:0            rs1   funct3     rd        opcode   ]
+  //   -----------------------------------------------------------------
+  template <typename Reg1, typename Reg2>
+  void EmitI(int32_t imm12, Reg1 rs1, uint32_t funct3, Reg2 rd, uint32_t opcode) {
+    DCHECK(IsInt<12>(imm12)) << imm12;
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+    DCHECK(IsUint<3>(funct3));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t encoding = static_cast<uint32_t>(imm12) << 20 | static_cast<uint32_t>(rs1) << 15 |
+                        funct3 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
+    Emit(encoding);
+  }
+
+  // R-type instruction:
+  //
+  //    31         25 24     20 19     15 14 12 11      7 6           0
+  //   -----------------------------------------------------------------
+  //   [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
+  //   [   funct7        rs2       rs1   funct3     rd        opcode   ]
+  //   -----------------------------------------------------------------
+  template <typename Reg1, typename Reg2, typename Reg3>
+  void EmitR(uint32_t funct7, Reg1 rs2, Reg2 rs1, uint32_t funct3, Reg3 rd, uint32_t opcode) {
+    DCHECK(IsUint<7>(funct7));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+    DCHECK(IsUint<3>(funct3));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t encoding = funct7 << 25 | static_cast<uint32_t>(rs2) << 20 |
+                        static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
+                        static_cast<uint32_t>(rd) << 7 | opcode;
+    Emit(encoding);
+  }
+
+  // R-type instruction variant for floating-point fused multiply-add/sub (F[N]MADD/ F[N]MSUB):
+  //
+  //    31     27  25 24     20 19     15 14 12 11      7 6           0
+  //   -----------------------------------------------------------------
+  //   [ . . . . | . | . . . . | . . . . | . . | . . . . | . . . . . . ]
+  //   [  rs3     fmt    rs2       rs1   funct3     rd        opcode   ]
+  //   -----------------------------------------------------------------
+  template <typename Reg1, typename Reg2, typename Reg3, typename Reg4>
+  void EmitR4(
+      Reg1 rs3, uint32_t fmt, Reg2 rs2, Reg3 rs1, uint32_t funct3, Reg4 rd, uint32_t opcode) {
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs3)));
+    DCHECK(IsUint<2>(fmt));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+    DCHECK(IsUint<3>(funct3));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t encoding = static_cast<uint32_t>(rs3) << 27 | static_cast<uint32_t>(fmt) << 25 |
+                        static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
+                        static_cast<uint32_t>(funct3) << 12 | static_cast<uint32_t>(rd) << 7 |
+                        opcode;
+    Emit(encoding);
+  }
+
+  // S-type instruction:
+  //
+  //    31         25 24     20 19     15 14 12 11      7 6           0
+  //   -----------------------------------------------------------------
+  //   [ . . . . . . | . . . . | . . . . | . . | . . . . | . . . . . . ]
+  //   [   imm11:5       rs2       rs1   funct3   imm4:0      opcode   ]
+  //   -----------------------------------------------------------------
+  template <typename Reg1, typename Reg2>
+  void EmitS(int32_t imm12, Reg1 rs2, Reg2 rs1, uint32_t funct3, uint32_t opcode) {
+    DCHECK(IsInt<12>(imm12)) << imm12;
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+    DCHECK(IsUint<3>(funct3));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t encoding = (static_cast<uint32_t>(imm12) & 0xFE0) << 20 |
+                        static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
+                        static_cast<uint32_t>(funct3) << 12 |
+                        (static_cast<uint32_t>(imm12) & 0x1F) << 7 | opcode;
+    Emit(encoding);
+  }
+
+  // I-type instruction variant for shifts (SLLI / SRLI / SRAI):
+  //
+  //    31       26 25       20 19     15 14 12 11      7 6           0
+  //   -----------------------------------------------------------------
+  //   [ . . . . . | . . . . . | . . . . | . . | . . . . | . . . . . . ]
+  //   [  imm11:6  imm5:0(shamt)   rs1   funct3     rd        opcode   ]
+  //   -----------------------------------------------------------------
+  void EmitI6(uint32_t funct6,
+              uint32_t imm6,
+              XRegister rs1,
+              uint32_t funct3,
+              XRegister rd,
+              uint32_t opcode) {
+    DCHECK(IsUint<6>(funct6));
+    DCHECK(IsUint<6>(imm6)) << imm6;
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+    DCHECK(IsUint<3>(funct3));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t encoding = funct6 << 26 | static_cast<uint32_t>(imm6) << 20 |
+                        static_cast<uint32_t>(rs1) << 15 | funct3 << 12 |
+                        static_cast<uint32_t>(rd) << 7 | opcode;
+    Emit(encoding);
+  }
+
+  // B-type instruction:
+  //
+  //   31 30       25 24     20 19     15 14 12 11    8 7 6           0
+  //   -----------------------------------------------------------------
+  //   [ | . . . . . | . . . . | . . . . | . . | . . . | | . . . . . . ]
+  //  imm12 imm11:5      rs2       rs1   funct3 imm4:1 imm11  opcode   ]
+  //   -----------------------------------------------------------------
+  void EmitB(int32_t offset, XRegister rs2, XRegister rs1, uint32_t funct3, uint32_t opcode) {
+    DCHECK_ALIGNED(offset, 2);
+    DCHECK(IsInt<13>(offset)) << offset;
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs2)));
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rs1)));
+    DCHECK(IsUint<3>(funct3));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t imm12 = (static_cast<uint32_t>(offset) >> 1) & 0xfffu;
+    uint32_t encoding = (imm12 & 0x800u) << (31 - 11) | (imm12 & 0x03f0u) << (25 - 4) |
+                        static_cast<uint32_t>(rs2) << 20 | static_cast<uint32_t>(rs1) << 15 |
+                        static_cast<uint32_t>(funct3) << 12 |
+                        (imm12 & 0xfu) << 8 | (imm12 & 0x400u) >> (10 - 7) | opcode;
+    Emit(encoding);
+  }
+
+  // U-type instruction:
+  //
+  //    31                                   12 11      7 6           0
+  //   -----------------------------------------------------------------
+  //   [ . . . . . . . . . . . . . . . . . . . | . . . . | . . . . . . ]
+  //   [                imm31:12                    rd        opcode   ]
+  //   -----------------------------------------------------------------
+  void EmitU(uint32_t imm20, XRegister rd, uint32_t opcode) {
+    CHECK(IsUint<20>(imm20)) << imm20;
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t encoding = imm20 << 12 | static_cast<uint32_t>(rd) << 7 | opcode;
+    Emit(encoding);
+  }
+
+  // J-type instruction:
+  //
+  //   31 30               21   19           12 11      7 6           0
+  //   -----------------------------------------------------------------
+  //   [ | . . . . . . . . . | | . . . . . . . | . . . . | . . . . . . ]
+  //  imm20    imm10:1      imm11   imm19:12        rd        opcode   ]
+  //   -----------------------------------------------------------------
+  void EmitJ(int32_t offset, XRegister rd, uint32_t opcode) {
+    DCHECK_ALIGNED(offset, 2);
+    CHECK(IsInt<21>(offset)) << offset;
+    DCHECK(IsUint<5>(static_cast<uint32_t>(rd)));
+    DCHECK(IsUint<7>(opcode));
+    uint32_t imm20 = (static_cast<uint32_t>(offset) >> 1) & 0xfffffu;
+    uint32_t encoding = (imm20 & 0x80000u) << (31 - 19) | (imm20 & 0x03ffu) << 21 |
+                        (imm20 & 0x400u) << (20 - 10) | (imm20 & 0x7f800u) << (12 - 11) |
+                        static_cast<uint32_t>(rd) << 7 | opcode;
+    Emit(encoding);
+  }
+
+  ArenaVector<Branch> branches_;
+
+  // For checking that we finalize the code only once.
+  bool finalized_;
+
+  // Whether appending instructions at the end of the buffer or overwriting the existing ones.
+  bool overwriting_;
+  // The current overwrite location.
+  uint32_t overwrite_location_;
+
+  // Use `std::deque<>` for literal labels to allow insertions at the end
+  // without invalidating pointers and references to existing elements.
+  ArenaDeque<Literal> literals_;
+  ArenaDeque<Literal> long_literals_;  // 64-bit literals separated for alignment reasons.
+
+  // Jump table list.
+  ArenaDeque<JumpTable> jump_tables_;
+
+  // Data for `GetAdjustedPosition()`, see the description there.
+  uint32_t last_position_adjustment_;
+  uint32_t last_old_position_;
+  uint32_t last_branch_id_;
+
+  uint32_t available_scratch_core_registers_;
+  uint32_t available_scratch_fp_registers_;
+
+  static constexpr uint32_t kXlen = 64;
+
+  friend class ScratchRegisterScope;
+
+  DISALLOW_COPY_AND_ASSIGN(Riscv64Assembler);
+};
+
+class ScratchRegisterScope {
+ public:
+  explicit ScratchRegisterScope(Riscv64Assembler* assembler)
+      : assembler_(assembler),
+        old_available_scratch_core_registers_(assembler->available_scratch_core_registers_),
+        old_available_scratch_fp_registers_(assembler->available_scratch_fp_registers_) {}
+
+  ~ScratchRegisterScope() {
+    assembler_->available_scratch_core_registers_ = old_available_scratch_core_registers_;
+    assembler_->available_scratch_fp_registers_ = old_available_scratch_fp_registers_;
+  }
+
+  // Alocate a scratch `XRegister`. There must be an available register to allocate.
+  XRegister AllocateXRegister() {
+    CHECK_NE(assembler_->available_scratch_core_registers_, 0u);
+    // Allocate the highest available scratch register (prefer TMP(T6) over TMP2(T5)).
+    uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_core_registers_) - 1u) -
+                       CLZ(assembler_->available_scratch_core_registers_);
+    assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+    return enum_cast<XRegister>(reg_num);
+  }
+
+  // Free a previously unavailable core register for use as a scratch register.
+  // This can be an arbitrary register, not necessarly the usual `TMP` or `TMP2`.
+  void FreeXRegister(XRegister reg) {
+    uint32_t reg_num = enum_cast<uint32_t>(reg);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+    CHECK_EQ((1u << reg_num) & assembler_->available_scratch_core_registers_, 0u);
+    assembler_->available_scratch_core_registers_ |= 1u << reg_num;
+  }
+
+  // The number of available scratch core registers.
+  size_t AvailableXRegisters() {
+    return POPCOUNT(assembler_->available_scratch_core_registers_);
+  }
+
+  // Make sure a core register is available for use as a scratch register.
+  void IncludeXRegister(XRegister reg) {
+    uint32_t reg_num = enum_cast<uint32_t>(reg);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+    assembler_->available_scratch_core_registers_ |= 1u << reg_num;
+  }
+
+  // Make sure a core register is not available for use as a scratch register.
+  void ExcludeXRegister(XRegister reg) {
+    uint32_t reg_num = enum_cast<uint32_t>(reg);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfXRegisters));
+    assembler_->available_scratch_core_registers_ &= ~(1u << reg_num);
+  }
+
+  // Alocate a scratch `FRegister`. There must be an available register to allocate.
+  FRegister AllocateFRegister() {
+    CHECK_NE(assembler_->available_scratch_fp_registers_, 0u);
+    // Allocate the highest available scratch register (same as for core registers).
+    uint32_t reg_num = (BitSizeOf(assembler_->available_scratch_fp_registers_) - 1u) -
+                       CLZ(assembler_->available_scratch_fp_registers_);
+    assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+    return enum_cast<FRegister>(reg_num);
+  }
+
+  // Free a previously unavailable FP register for use as a scratch register.
+  // This can be an arbitrary register, not necessarly the usual `FTMP`.
+  void FreeFRegister(FRegister reg) {
+    uint32_t reg_num = enum_cast<uint32_t>(reg);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+    CHECK_EQ((1u << reg_num) & assembler_->available_scratch_fp_registers_, 0u);
+    assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
+  }
+
+  // The number of available scratch FP registers.
+  size_t AvailableFRegisters() {
+    return POPCOUNT(assembler_->available_scratch_fp_registers_);
+  }
+
+  // Make sure an FP register is available for use as a scratch register.
+  void IncludeFRegister(FRegister reg) {
+    uint32_t reg_num = enum_cast<uint32_t>(reg);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+    assembler_->available_scratch_fp_registers_ |= 1u << reg_num;
+  }
+
+  // Make sure an FP register is not available for use as a scratch register.
+  void ExcludeFRegister(FRegister reg) {
+    uint32_t reg_num = enum_cast<uint32_t>(reg);
+    DCHECK_LT(reg_num, enum_cast<uint32_t>(kNumberOfFRegisters));
+    assembler_->available_scratch_fp_registers_ &= ~(1u << reg_num);
+  }
+
+ private:
+  Riscv64Assembler* const assembler_;
+  const uint32_t old_available_scratch_core_registers_;
+  const uint32_t old_available_scratch_fp_registers_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScratchRegisterScope);
+};
+
+}  // namespace riscv64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_RISCV64_ASSEMBLER_RISCV64_H_
diff --git a/compiler/utils/riscv64/assembler_riscv64_test.cc b/compiler/utils/riscv64/assembler_riscv64_test.cc
new file mode 100644
index 0000000000..0299ac25c5
--- /dev/null
+++ b/compiler/utils/riscv64/assembler_riscv64_test.cc
@@ -0,0 +1,2939 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "assembler_riscv64.h"
+
+#include <inttypes.h>
+
+#include <map>
+
+#include "base/bit_utils.h"
+#include "utils/assembler_test.h"
+
+#define __ GetAssembler()->
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+struct RISCV64CpuRegisterCompare {
+  bool operator()(const XRegister& a, const XRegister& b) const { return a < b; }
+};
+
+class AssemblerRISCV64Test : public AssemblerTest<Riscv64Assembler,
+                                                  Riscv64Label,
+                                                  XRegister,
+                                                  FRegister,
+                                                  int32_t> {
+ public:
+  using Base = AssemblerTest<Riscv64Assembler,
+                             Riscv64Label,
+                             XRegister,
+                             FRegister,
+                             int32_t>;
+
+  AssemblerRISCV64Test()
+      : instruction_set_features_(Riscv64InstructionSetFeatures::FromVariant("default", nullptr)) {}
+
+ protected:
+  Riscv64Assembler* CreateAssembler(ArenaAllocator* allocator) override {
+    return new (allocator) Riscv64Assembler(allocator, instruction_set_features_.get());
+  }
+
+  InstructionSet GetIsa() override { return InstructionSet::kRiscv64; }
+
+  // Clang's assembler takes advantage of certain extensions for emitting constants with `li`
+  // but our assembler does not. For now, we use a simple `-march` to avoid the divergence.
+  // TODO(riscv64): Implement these more efficient patterns in assembler.
+  void SetUseSimpleMarch(bool value) {
+    use_simple_march_ = value;
+  }
+
+  std::vector<std::string> GetAssemblerCommand() override {
+    std::vector<std::string> result = Base::GetAssemblerCommand();
+    if (use_simple_march_) {
+      auto it = std::find_if(result.begin(),
+                             result.end(),
+                             [](const std::string& s) { return StartsWith(s, "-march="); });
+      CHECK(it != result.end());
+      *it = "-march=rv64imafd";
+    }
+    return result;
+  }
+
+  std::vector<std::string> GetDisassemblerCommand() override {
+    std::vector<std::string> result = Base::GetDisassemblerCommand();
+    if (use_simple_march_) {
+      auto it = std::find_if(result.begin(),
+                             result.end(),
+                             [](const std::string& s) { return StartsWith(s, "--mattr="); });
+      CHECK(it != result.end());
+      *it = "--mattr=+F,+D,+A";
+    }
+    return result;
+  }
+
+  void SetUpHelpers() override {
+    if (secondary_register_names_.empty()) {
+      secondary_register_names_.emplace(Zero, "zero");
+      secondary_register_names_.emplace(RA, "ra");
+      secondary_register_names_.emplace(SP, "sp");
+      secondary_register_names_.emplace(GP, "gp");
+      secondary_register_names_.emplace(TP, "tp");
+      secondary_register_names_.emplace(T0, "t0");
+      secondary_register_names_.emplace(T1, "t1");
+      secondary_register_names_.emplace(T2, "t2");
+      secondary_register_names_.emplace(S0, "s0");  // s0/fp
+      secondary_register_names_.emplace(S1, "s1");
+      secondary_register_names_.emplace(A0, "a0");
+      secondary_register_names_.emplace(A1, "a1");
+      secondary_register_names_.emplace(A2, "a2");
+      secondary_register_names_.emplace(A3, "a3");
+      secondary_register_names_.emplace(A4, "a4");
+      secondary_register_names_.emplace(A5, "a5");
+      secondary_register_names_.emplace(A6, "a6");
+      secondary_register_names_.emplace(A7, "a7");
+      secondary_register_names_.emplace(S2, "s2");
+      secondary_register_names_.emplace(S3, "s3");
+      secondary_register_names_.emplace(S4, "s4");
+      secondary_register_names_.emplace(S5, "s5");
+      secondary_register_names_.emplace(S6, "s6");
+      secondary_register_names_.emplace(S7, "s7");
+      secondary_register_names_.emplace(S8, "s8");
+      secondary_register_names_.emplace(S9, "s9");
+      secondary_register_names_.emplace(S10, "s10");
+      secondary_register_names_.emplace(S11, "s11");
+      secondary_register_names_.emplace(T3, "t3");
+      secondary_register_names_.emplace(T4, "t4");
+      secondary_register_names_.emplace(T5, "t5");
+      secondary_register_names_.emplace(T6, "t6");
+    }
+  }
+
+  void TearDown() override {
+    AssemblerTest::TearDown();
+  }
+
+  std::vector<Riscv64Label> GetAddresses() override {
+    UNIMPLEMENTED(FATAL) << "Feature not implemented yet";
+    UNREACHABLE();
+  }
+
+  ArrayRef<const XRegister> GetRegisters() override {
+    static constexpr XRegister kXRegisters[] = {
+        Zero,
+        RA,
+        SP,
+        GP,
+        TP,
+        T0,
+        T1,
+        T2,
+        S0,
+        S1,
+        A0,
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        S2,
+        S3,
+        S4,
+        S5,
+        S6,
+        S7,
+        S8,
+        S9,
+        S10,
+        S11,
+        T3,
+        T4,
+        T5,
+        T6,
+    };
+    return ArrayRef<const XRegister>(kXRegisters);
+  }
+
+  ArrayRef<const FRegister> GetFPRegisters() override {
+    static constexpr FRegister kFRegisters[] = {
+        FT0,
+        FT1,
+        FT2,
+        FT3,
+        FT4,
+        FT5,
+        FT6,
+        FT7,
+        FS0,
+        FS1,
+        FA0,
+        FA1,
+        FA2,
+        FA3,
+        FA4,
+        FA5,
+        FA6,
+        FA7,
+        FS2,
+        FS3,
+        FS4,
+        FS5,
+        FS6,
+        FS7,
+        FS8,
+        FS9,
+        FS10,
+        FS11,
+        FT8,
+        FT9,
+        FT10,
+        FT11,
+    };
+    return ArrayRef<const FRegister>(kFRegisters);
+  }
+
+  std::string GetSecondaryRegisterName(const XRegister& reg) override {
+    CHECK(secondary_register_names_.find(reg) != secondary_register_names_.end());
+    return secondary_register_names_[reg];
+  }
+
+  int32_t CreateImmediate(int64_t imm_value) override {
+    return dchecked_integral_cast<int32_t>(imm_value);
+  }
+
+  template <typename Emit>
+  std::string RepeatInsn(size_t count, const std::string& insn, Emit&& emit) {
+    std::string result;
+    for (; count != 0u; --count) {
+      result += insn;
+      emit();
+    }
+    return result;
+  }
+
+  std::string EmitNops(size_t size) {
+    // TODO(riscv64): Support "C" Standard Extension.
+    DCHECK_ALIGNED(size, sizeof(uint32_t));
+    const size_t num_nops = size / sizeof(uint32_t);
+    return RepeatInsn(num_nops, "nop\n", [&]() { __ Nop(); });
+  }
+
+  template <typename EmitLoadConst>
+  void TestLoadConst64(const std::string& test_name,
+                       bool can_use_tmp,
+                       EmitLoadConst&& emit_load_const) {
+    std::string expected;
+    // Test standard immediates. Unlike other instructions, `Li()` accepts an `int64_t` but
+    // this is unsupported by `CreateImmediate()`, so we cannot use `RepeatRIb()` for these.
+    // Note: This `CreateImmediateValuesBits()` call does not produce any values where
+    // `LoadConst64()` would emit different code from `Li()`.
+    for (int64_t value : CreateImmediateValuesBits(64, /*as_uint=*/ false)) {
+      emit_load_const(A0, value);
+      expected += "li a0, " + std::to_string(value) + "\n";
+    }
+    // Test various registers with a few small values.
+    // (Even Zero is an accepted register even if that does not really load the requested value.)
+    for (XRegister reg : GetRegisters()) {
+      ScratchRegisterScope srs(GetAssembler());
+      srs.ExcludeXRegister(reg);
+      std::string rd = GetRegisterName(reg);
+      emit_load_const(reg, -1);
+      expected += "li " + rd + ", -1\n";
+      emit_load_const(reg, 0);
+      expected += "li " + rd + ", 0\n";
+      emit_load_const(reg, 1);
+      expected += "li " + rd + ", 1\n";
+    }
+    // Test some significant values. Some may just repeat the tests above but other values
+    // show some complex patterns, even exposing a value where clang (and therefore also this
+    // assembler) does not generate the shortest sequence.
+    // For the following values, `LoadConst64()` emits the same code as `Li()`.
+    int64_t test_values1[] = {
+        // Small values, either ADDI, ADDI+SLLI, LUI, or LUI+ADDIW.
+        // The ADDI+LUI is presumably used to allow shorter code for RV64C.
+        -4097, -4096, -4095, -2176, -2049, -2048, -2047, -1025, -1024, -1023, -2, -1,
+        0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049, 2176, 4095, 4096, 4097,
+        // Just below std::numeric_limits<int32_t>::min()
+        INT64_C(-0x80000001),  // LUI+ADDI
+        INT64_C(-0x80000800),  // LUI+ADDI
+        INT64_C(-0x80000801),  // LUI+ADDIW+SLLI+ADDI; LUI+ADDI+ADDI would be shorter.
+        INT64_C(-0x80000800123),  // LUI+ADDIW+SLLI+ADDI
+        INT64_C(0x0123450000000123),  // LUI+SLLI+ADDI
+        INT64_C(-0x7654300000000123),  // LUI+SLLI+ADDI
+        INT64_C(0x0fffffffffff0000),  // LUI+SRLI
+        INT64_C(0x0ffffffffffff000),  // LUI+SRLI
+        INT64_C(0x0ffffffffffff010),  // LUI+ADDIW+SRLI
+        INT64_C(0x0fffffffffffff10),  // ADDI+SLLI+ADDI; LUI+ADDIW+SRLI would be same length.
+        INT64_C(0x0fffffffffffff80),  // ADDI+SRLI
+        INT64_C(0x0ffffffff7ffff80),  // LUI+ADDI+SRLI
+        INT64_C(0x0123450000001235),  // LUI+SLLI+ADDI+SLLI+ADDI
+        INT64_C(0x0123450000001234),  // LUI+SLLI+ADDI+SLLI
+        INT64_C(0x0000000fff808010),  // LUI+SLLI+SRLI
+        INT64_C(0x00000000fff80801),  // LUI+SLLI+SRLI
+        INT64_C(0x00000000ffffffff),  // ADDI+SRLI
+        INT64_C(0x00000001ffffffff),  // ADDI+SRLI
+        INT64_C(0x00000003ffffffff),  // ADDI+SRLI
+        INT64_C(0x00000000ffc00801),  // LUI+ADDIW+SLLI+ADDI
+        INT64_C(0x00000001fffff7fe),  // ADDI+SLLI+SRLI
+    };
+    for (int64_t value : test_values1) {
+      emit_load_const(A0, value);
+      expected += "li a0, " + std::to_string(value) + "\n";
+    }
+    // For the following values, `LoadConst64()` emits different code than `Li()`.
+    std::pair<int64_t, const char*> test_values2[] = {
+        // Li:        LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI
+        // LoadConst: LUI+ADDIW+LUI+ADDIW+SLLI+ADD (using TMP)
+        { INT64_C(0x1234567812345678),
+          "li {reg1}, 0x12345678 / 8\n"  // Trailing zero bits in high word are handled by SLLI.
+          "li {reg2}, 0x12345678\n"
+          "slli {reg1}, {reg1}, 32 + 3\n"
+          "add {reg1}, {reg1}, {reg2}\n" },
+        { INT64_C(0x1234567887654321),
+          "li {reg1}, 0x12345678 + 1\n"  // One higher to compensate for negative TMP.
+          "li {reg2}, 0x87654321 - 0x100000000\n"
+          "slli {reg1}, {reg1}, 32\n"
+          "add {reg1}, {reg1}, {reg2}\n" },
+        { INT64_C(-0x1234567887654321),
+          "li {reg1}, -0x12345678 - 1\n"  // High 32 bits of the constant.
+          "li {reg2}, 0x100000000 - 0x87654321\n"  // Low 32 bits of the constant.
+          "slli {reg1}, {reg1}, 32\n"
+          "add {reg1}, {reg1}, {reg2}\n" },
+
+        // Li:        LUI+SLLI+ADDI+SLLI+ADDI+SLLI
+        // LoadConst: LUI+LUI+SLLI+ADD (using TMP)
+        { INT64_C(0x1234500012345000),
+          "lui {reg1}, 0x12345\n"
+          "lui {reg2}, 0x12345\n"
+          "slli {reg1}, {reg1}, 44 - 12\n"
+          "add {reg1}, {reg1}, {reg2}\n" },
+        { INT64_C(0x0123450012345000),
+          "lui {reg1}, 0x12345\n"
+          "lui {reg2}, 0x12345\n"
+          "slli {reg1}, {reg1}, 40 - 12\n"
+          "add {reg1}, {reg1}, {reg2}\n" },
+
+        // Li:        LUI+ADDIW+SLLI+ADDI+SLLI+ADDI
+        // LoadConst: LUI+LUI+ADDIW+SLLI+ADD (using TMP)
+        { INT64_C(0x0001234512345678),
+          "lui {reg1}, 0x12345\n"
+          "li {reg2}, 0x12345678\n"
+          "slli {reg1}, {reg1}, 32 - 12\n"
+          "add {reg1}, {reg1}, {reg2}\n" },
+        { INT64_C(0x0012345012345678),
+          "lui {reg1}, 0x12345\n"
+          "li {reg2}, 0x12345678\n"
+          "slli {reg1}, {reg1}, 36 - 12\n"
+          "add {reg1}, {reg1}, {reg2}\n" },
+    };
+    for (auto [value, fmt] : test_values2) {
+      emit_load_const(A0, value);
+      if (can_use_tmp) {
+        std::string base = fmt;
+        ReplaceReg(REG1_TOKEN, GetRegisterName(A0), &base);
+        ReplaceReg(REG2_TOKEN, GetRegisterName(TMP), &base);
+        expected += base;
+      } else {
+        expected += "li a0, " + std::to_string(value) + "\n";
+      }
+    }
+
+    DriverStr(expected, test_name);
+  }
+
+  auto GetPrintBcond() {
+    return [](const std::string& cond,
+              [[maybe_unused]] const std::string& opposite_cond,
+              const std::string& args,
+              const std::string& target) {
+      return "b" + cond + args + ", " + target + "\n";
+    };
+  }
+
+  auto GetPrintBcondOppositeAndJ(const std::string& skip_label) {
+    return [=]([[maybe_unused]] const std::string& cond,
+               const std::string& opposite_cond,
+               const std::string& args,
+               const std::string& target) {
+      return "b" + opposite_cond + args + ", " + skip_label + "f\n" +
+             "j " + target + "\n" +
+             skip_label + ":\n";
+    };
+  }
+
+  auto GetPrintBcondOppositeAndTail(const std::string& skip_label, const std::string& base_label) {
+    return [=]([[maybe_unused]] const std::string& cond,
+               const std::string& opposite_cond,
+               const std::string& args,
+               const std::string& target) {
+      return "b" + opposite_cond + args + ", " + skip_label + "f\n" +
+             base_label + ":\n" +
+             "auipc t6, %pcrel_hi(" + target + ")\n" +
+             "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n" +
+             skip_label + ":\n";
+    };
+  }
+
+  // Helper function for basic tests that all branch conditions map to the correct opcodes,
+  // whether with branch expansion (a conditional branch with opposite condition over an
+  // unconditional branch) or without.
+  template <typename PrintBcond>
+  std::string EmitBcondForAllConditions(Riscv64Label* label,
+                                        const std::string& target,
+                                        PrintBcond&& print_bcond) {
+    XRegister rs = A0;
+    __ Beqz(rs, label);
+    __ Bnez(rs, label);
+    __ Blez(rs, label);
+    __ Bgez(rs, label);
+    __ Bltz(rs, label);
+    __ Bgtz(rs, label);
+    XRegister rt = A1;
+    __ Beq(rs, rt, label);
+    __ Bne(rs, rt, label);
+    __ Ble(rs, rt, label);
+    __ Bge(rs, rt, label);
+    __ Blt(rs, rt, label);
+    __ Bgt(rs, rt, label);
+    __ Bleu(rs, rt, label);
+    __ Bgeu(rs, rt, label);
+    __ Bltu(rs, rt, label);
+    __ Bgtu(rs, rt, label);
+
+    return
+        print_bcond("eq", "ne", "z a0", target) +
+        print_bcond("ne", "eq", "z a0", target) +
+        print_bcond("le", "gt", "z a0", target) +
+        print_bcond("ge", "lt", "z a0", target) +
+        print_bcond("lt", "ge", "z a0", target) +
+        print_bcond("gt", "le", "z a0", target) +
+        print_bcond("eq", "ne", " a0, a1", target) +
+        print_bcond("ne", "eq", " a0, a1", target) +
+        print_bcond("le", "gt", " a0, a1", target) +
+        print_bcond("ge", "lt", " a0, a1", target) +
+        print_bcond("lt", "ge", " a0, a1", target) +
+        print_bcond("gt", "le", " a0, a1", target) +
+        print_bcond("leu", "gtu", " a0, a1", target) +
+        print_bcond("geu", "ltu", " a0, a1", target) +
+        print_bcond("ltu", "geu", " a0, a1", target) +
+        print_bcond("gtu", "leu", " a0, a1", target);
+  }
+
+  // Test Bcond for forward branches with all conditions.
+  // The gap must be such that either all branches expand, or none does.
+  template <typename PrintBcond>
+  void TestBcondForward(const std::string& test_name,
+                        size_t gap_size,
+                        const std::string& target_label,
+                        PrintBcond&& print_bcond) {
+    std::string expected;
+    Riscv64Label label;
+    expected += EmitBcondForAllConditions(&label, target_label + "f", print_bcond);
+    expected += EmitNops(gap_size);
+    __ Bind(&label);
+    expected += target_label + ":\n";
+    DriverStr(expected, test_name);
+  }
+
+  // Test Bcond for backward branches with all conditions.
+  // The gap must be such that either all branches expand, or none does.
+  template <typename PrintBcond>
+  void TestBcondBackward(const std::string& test_name,
+                         size_t gap_size,
+                         const std::string& target_label,
+                         PrintBcond&& print_bcond) {
+    std::string expected;
+    Riscv64Label label;
+    __ Bind(&label);
+    expected += target_label + ":\n";
+    expected += EmitNops(gap_size);
+    expected += EmitBcondForAllConditions(&label, target_label + "b", print_bcond);
+    DriverStr(expected, test_name);
+  }
+
+  size_t MaxOffset13BackwardDistance() {
+    return 4 * KB;
+  }
+
+  size_t MaxOffset13ForwardDistance() {
+    // TODO(riscv64): Support "C" Standard Extension, max forward distance 4KiB - 2.
+    return 4 * KB - 4;
+  }
+
+  size_t MaxOffset21BackwardDistance() {
+    return 1 * MB;
+  }
+
+  size_t MaxOffset21ForwardDistance() {
+    // TODO(riscv64): Support "C" Standard Extension, max forward distance 1MiB - 2.
+    return 1 * MB - 4;
+  }
+
+  template <typename PrintBcond>
+  void TestBeqA0A1Forward(const std::string& test_name,
+                          size_t nops_size,
+                          const std::string& target_label,
+                          PrintBcond&& print_bcond) {
+    std::string expected;
+    Riscv64Label label;
+    __ Beq(A0, A1, &label);
+    expected += print_bcond("eq", "ne", " a0, a1", target_label + "f");
+    expected += EmitNops(nops_size);
+    __ Bind(&label);
+    expected += target_label + ":\n";
+    DriverStr(expected, test_name);
+  }
+
+  template <typename PrintBcond>
+  void TestBeqA0A1Backward(const std::string& test_name,
+                           size_t nops_size,
+                           const std::string& target_label,
+                           PrintBcond&& print_bcond) {
+    std::string expected;
+    Riscv64Label label;
+    __ Bind(&label);
+    expected += target_label + ":\n";
+    expected += EmitNops(nops_size);
+    __ Beq(A0, A1, &label);
+    expected += print_bcond("eq", "ne", " a0, a1", target_label + "b");
+    DriverStr(expected, test_name);
+  }
+
+  // Test a branch setup where expanding one branch causes expanding another branch
+  // which causes expanding another branch, etc. The argument `cascade` determines
+  // whether we push the first branch to expand, or not.
+  template <typename PrintBcond>
+  void TestBeqA0A1MaybeCascade(const std::string& test_name,
+                               bool cascade,
+                               PrintBcond&& print_bcond) {
+    const size_t kNumBeqs = MaxOffset13ForwardDistance() / sizeof(uint32_t) / 2u;
+    auto label_name = [](size_t i) { return  ".L" + std::to_string(i); };
+
+    std::string expected;
+    std::vector<Riscv64Label> labels(kNumBeqs);
+    for (size_t i = 0; i != kNumBeqs; ++i) {
+      __ Beq(A0, A1, &labels[i]);
+      expected += print_bcond("eq", "ne", " a0, a1", label_name(i));
+    }
+    if (cascade) {
+      expected += EmitNops(sizeof(uint32_t));
+    }
+    for (size_t i = 0; i != kNumBeqs; ++i) {
+      expected += EmitNops(2 * sizeof(uint32_t));
+      __ Bind(&labels[i]);
+      expected += label_name(i) + ":\n";
+    }
+    DriverStr(expected, test_name);
+  }
+
+  auto GetPrintJalRd() {
+    return [=](XRegister rd, const std::string& target) {
+      std::string rd_name = GetRegisterName(rd);
+      return "jal " + rd_name + ", " + target + "\n";
+    };
+  }
+
+  auto GetPrintCallRd(const std::string& base_label) {
+    return [=](XRegister rd, const std::string& target) {
+      std::string rd_name = GetRegisterName(rd);
+      std::string temp_name = (rd != Zero) ? rd_name : GetRegisterName(TMP);
+      return base_label + ":\n" +
+             "auipc " + temp_name + ", %pcrel_hi(" + target + ")\n" +
+             "jalr " + rd_name + ", %pcrel_lo(" + base_label + "b)(" + temp_name + ")\n";
+    };
+  }
+
+  template <typename PrintJalRd>
+  void TestJalRdForward(const std::string& test_name,
+                        size_t gap_size,
+                        const std::string& label_name,
+                        PrintJalRd&& print_jalrd) {
+    std::string expected;
+    Riscv64Label label;
+    for (XRegister reg : GetRegisters()) {
+      __ Jal(reg, &label);
+      expected += print_jalrd(reg, label_name + "f");
+    }
+    expected += EmitNops(gap_size);
+    __ Bind(&label);
+    expected += label_name + ":\n";
+    DriverStr(expected, test_name);
+  }
+
+  template <typename PrintJalRd>
+  void TestJalRdBackward(const std::string& test_name,
+                         size_t gap_size,
+                         const std::string& label_name,
+                         PrintJalRd&& print_jalrd) {
+    std::string expected;
+    Riscv64Label label;
+    __ Bind(&label);
+    expected += label_name + ":\n";
+    expected += EmitNops(gap_size);
+    for (XRegister reg : GetRegisters()) {
+      __ Jal(reg, &label);
+      expected += print_jalrd(reg, label_name + "b");
+    }
+    DriverStr(expected, test_name);
+  }
+
+  auto GetEmitJ() {
+    return [=](Riscv64Label* label) { __ J(label); };
+  }
+
+  auto GetEmitJal() {
+    return [=](Riscv64Label* label) { __ Jal(label); };
+  }
+
+  auto GetPrintJ() {
+    return [=](const std::string& target) {
+      return "j " + target + "\n";
+    };
+  }
+
+  auto GetPrintJal() {
+    return [=](const std::string& target) {
+      return "jal " + target + "\n";
+    };
+  }
+
+  auto GetPrintTail(const std::string& base_label) {
+    return [=](const std::string& target) {
+      return base_label + ":\n" +
+             "auipc t6, %pcrel_hi(" + target + ")\n" +
+             "jalr x0, %pcrel_lo(" + base_label + "b)(t6)\n";
+    };
+  }
+
+  auto GetPrintCall(const std::string& base_label) {
+    return [=](const std::string& target) {
+      return base_label + ":\n" +
+             "auipc ra, %pcrel_hi(" + target + ")\n" +
+             "jalr ra, %pcrel_lo(" + base_label + "b)(ra)\n";
+    };
+  }
+
+  template <typename EmitBuncond, typename PrintBuncond>
+  void TestBuncondForward(const std::string& test_name,
+                          size_t gap_size,
+                          const std::string& label_name,
+                          EmitBuncond&& emit_buncond,
+                          PrintBuncond&& print_buncond) {
+    std::string expected;
+    Riscv64Label label;
+    emit_buncond(&label);
+    expected += print_buncond(label_name + "f");
+    expected += EmitNops(gap_size);
+    __ Bind(&label);
+    expected += label_name + ":\n";
+    DriverStr(expected, test_name);
+  }
+
+  template <typename EmitBuncond, typename PrintBuncond>
+  void TestBuncondBackward(const std::string& test_name,
+                           size_t gap_size,
+                           const std::string& label_name,
+                           EmitBuncond&& emit_buncond,
+                           PrintBuncond&& print_buncond) {
+    std::string expected;
+    Riscv64Label label;
+    __ Bind(&label);
+    expected += label_name + ":\n";
+    expected += EmitNops(gap_size);
+    emit_buncond(&label);
+    expected += print_buncond(label_name + "b");
+    DriverStr(expected, test_name);
+  }
+
+  template <typename EmitOp>
+  void TestAddConst(const std::string& test_name,
+                    size_t bits,
+                    const std::string& suffix,
+                    EmitOp&& emit_op) {
+    int64_t kImm12s[] = {
+        0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff,
+        -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800,
+    };
+    int64_t kSimplePositiveValues[] = {
+        0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff7, 0xff8, 0xffb, 0xffc, 0xffd, 0xffe,
+    };
+    int64_t kSimpleNegativeValues[] = {
+        -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000,
+    };
+    std::vector<int64_t> large_values = CreateImmediateValuesBits(bits, /*as_uint=*/ false);
+    auto kept_end = std::remove_if(large_values.begin(),
+                                   large_values.end(),
+                                   [](int64_t value) { return IsInt<13>(value); });
+    large_values.erase(kept_end, large_values.end());
+    large_values.push_back(0xfff);
+
+    std::string expected;
+    for (XRegister rd : GetRegisters()) {
+      std::string rd_name = GetRegisterName(rd);
+      std::string addi_rd = ART_FORMAT("addi{} {}, ", suffix, rd_name);
+      std::string add_rd = ART_FORMAT("add{} {}, ", suffix, rd_name);
+      for (XRegister rs1 : GetRegisters()) {
+        ScratchRegisterScope srs(GetAssembler());
+        srs.ExcludeXRegister(rs1);
+        srs.ExcludeXRegister(rd);
+
+        std::string rs1_name = GetRegisterName(rs1);
+        std::string tmp_name = GetRegisterName((rs1 != TMP) ? TMP : TMP2);
+        std::string addi_tmp = ART_FORMAT("addi{} {}, ", suffix, tmp_name);
+
+        for (int64_t imm : kImm12s) {
+          emit_op(rd, rs1, imm);
+          expected += ART_FORMAT("{}{}, {}\n", addi_rd, rs1_name, std::to_string(imm));
+        }
+
+        auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) {
+          for (int64_t imm : imms) {
+            emit_op(rd, rs1, imm);
+            expected += ART_FORMAT("{}{}, {}\n", addi_tmp, rs1_name, std::to_string(adjustment));
+            expected +=
+                ART_FORMAT("{}{}, {}\n", addi_rd, tmp_name, std::to_string(imm - adjustment));
+          }
+        };
+        emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveValues), 0x7ff);
+        emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeValues), -0x800);
+
+        for (int64_t imm : large_values) {
+          emit_op(rd, rs1, imm);
+          expected += ART_FORMAT("li {}, {}\n", tmp_name, std::to_string(imm));
+          expected += ART_FORMAT("{}{}, {}\n", add_rd, rs1_name, tmp_name);
+        }
+      }
+    }
+    DriverStr(expected, test_name);
+  }
+
+  template <typename GetTemp, typename EmitOp>
+  std::string RepeatLoadStoreArbitraryOffset(const std::string& head,
+                                             GetTemp&& get_temp,
+                                             EmitOp&& emit_op) {
+    int64_t kImm12s[] = {
+        0, 1, 2, 0xff, 0x100, 0x1ff, 0x200, 0x3ff, 0x400, 0x7ff,
+        -1, -2, -0x100, -0x101, -0x200, -0x201, -0x400, -0x401, -0x800,
+    };
+    int64_t kSimplePositiveOffsetsAlign8[] = {
+        0x800, 0x801, 0xbff, 0xc00, 0xff0, 0xff4, 0xff6, 0xff7
+    };
+    int64_t kSimplePositiveOffsetsAlign4[] = {
+        0xff8, 0xff9, 0xffa, 0xffb
+    };
+    int64_t kSimplePositiveOffsetsAlign2[] = {
+        0xffc, 0xffd
+    };
+    int64_t kSimplePositiveOffsetsNoAlign[] = {
+        0xffe
+    };
+    int64_t kSimpleNegativeOffsets[] = {
+        -0x801, -0x802, -0xbff, -0xc00, -0xff0, -0xff8, -0xffc, -0xffe, -0xfff, -0x1000,
+    };
+    int64_t kSplitOffsets[] = {
+        0xfff, 0x1000, 0x1001, 0x17ff, 0x1800, 0x1fff, 0x2000, 0x2001, 0x27ff, 0x2800,
+        0x7fffe7ff, 0x7fffe800, 0x7fffefff, 0x7ffff000, 0x7ffff001, 0x7ffff7ff,
+        -0x1001, -0x1002, -0x17ff, -0x1800, -0x1801, -0x2000, -0x2001, -0x2800, -0x2801,
+        -0x7ffff000, -0x7ffff001, -0x7ffff800, -0x7ffff801, -0x7fffffff, -0x80000000,
+    };
+    int64_t kSpecialOffsets[] = {
+        0x7ffff800, 0x7ffff801, 0x7ffffffe, 0x7fffffff
+    };
+
+    std::string expected;
+    for (XRegister rs1 : GetRegisters()) {
+      XRegister tmp = get_temp(rs1);
+      if (tmp == kNoXRegister) {
+        continue;  // Unsupported register combination.
+      }
+      std::string tmp_name = GetRegisterName(tmp);
+      ScratchRegisterScope srs(GetAssembler());
+      srs.ExcludeXRegister(rs1);
+      std::string rs1_name = GetRegisterName(rs1);
+
+      for (int64_t imm : kImm12s) {
+        emit_op(rs1, imm);
+        expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm), rs1_name);
+      }
+
+      auto emit_simple_ops = [&](ArrayRef<const int64_t> imms, int64_t adjustment) {
+        for (int64_t imm : imms) {
+          emit_op(rs1, imm);
+          expected +=
+              ART_FORMAT("addi {}, {}, {}\n", tmp_name, rs1_name, std::to_string(adjustment));
+          expected += ART_FORMAT("{}, {}({})\n", head, std::to_string(imm - adjustment), tmp_name);
+        }
+      };
+      emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign8), 0x7f8);
+      emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign4), 0x7fc);
+      emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsAlign2), 0x7fe);
+      emit_simple_ops(ArrayRef<const int64_t>(kSimplePositiveOffsetsNoAlign), 0x7ff);
+      emit_simple_ops(ArrayRef<const int64_t>(kSimpleNegativeOffsets), -0x800);
+
+      for (int64_t imm : kSplitOffsets) {
+        emit_op(rs1, imm);
+        uint32_t imm20 = ((imm >> 12) + ((imm >> 11) & 1)) & 0xfffff;
+        int32_t small_offset = (imm & 0xfff) - ((imm & 0x800) << 1);
+        expected += ART_FORMAT("lui {}, {}\n", tmp_name, std::to_string(imm20));
+        expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name);
+        expected += ART_FORMAT("{},{}({})\n", head, std::to_string(small_offset), tmp_name);
+      }
+
+      for (int64_t imm : kSpecialOffsets) {
+        emit_op(rs1, imm);
+        expected += ART_FORMAT("lui {}, 0x80000\n", tmp_name);
+        expected +=
+            ART_FORMAT("addiw {}, {}, {}\n", tmp_name, tmp_name, std::to_string(imm - 0x80000000));
+        expected += ART_FORMAT("add {}, {}, {}\n", tmp_name, tmp_name, rs1_name);
+        expected += ART_FORMAT("{}, ({})\n", head, tmp_name);
+      }
+    }
+    return expected;
+  }
+
+  void TestLoadStoreArbitraryOffset(const std::string& test_name,
+                                    const std::string& insn,
+                                    void (Riscv64Assembler::*fn)(XRegister, XRegister, int32_t),
+                                    bool is_store) {
+    std::string expected;
+    for (XRegister rd : GetRegisters()) {
+      ScratchRegisterScope srs(GetAssembler());
+      srs.ExcludeXRegister(rd);
+      auto get_temp = [&](XRegister rs1) {
+        if (is_store) {
+          return (rs1 != TMP && rd != TMP)
+              ? TMP
+              : (rs1 != TMP2 && rd != TMP2) ? TMP2 : kNoXRegister;
+        } else {
+          return rs1 != TMP ? TMP : TMP2;
+        }
+      };
+      expected += RepeatLoadStoreArbitraryOffset(
+          insn + " " + GetRegisterName(rd),
+          get_temp,
+          [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); });
+    }
+    DriverStr(expected, test_name);
+  }
+
+  void TestFPLoadStoreArbitraryOffset(const std::string& test_name,
+                                      const std::string& insn,
+                                      void (Riscv64Assembler::*fn)(FRegister, XRegister, int32_t)) {
+    std::string expected;
+    for (FRegister rd : GetFPRegisters()) {
+      expected += RepeatLoadStoreArbitraryOffset(
+          insn + " " + GetFPRegName(rd),
+          [&](XRegister rs1) { return rs1 != TMP ? TMP : TMP2; },
+          [&](XRegister rs1, int64_t offset) { (GetAssembler()->*fn)(rd, rs1, offset); });
+    }
+    DriverStr(expected, test_name);
+  }
+
+  void TestLoadLiteral(const std::string& test_name, bool with_padding_for_long) {
+    std::string expected;
+    Literal* narrow_literal = __ NewLiteral<uint32_t>(0x12345678);
+    Literal* wide_literal = __ NewLiteral<uint64_t>(0x1234567887654321);
+    auto print_load = [&](const std::string& load, XRegister rd, const std::string& label) {
+      std::string rd_name = GetRegisterName(rd);
+      expected += "1:\n"
+                  "auipc " + rd_name + ", %pcrel_hi(" + label + "f)\n" +
+                  load + " " + rd_name + ", %pcrel_lo(1b)(" + rd_name + ")\n";
+    };
+    for (XRegister reg : GetRegisters()) {
+      if (reg != Zero) {
+        __ Loadw(reg, narrow_literal);
+        print_load("lw", reg, "2");
+        __ Loadwu(reg, narrow_literal);
+        print_load("lwu", reg, "2");
+        __ Loadd(reg, wide_literal);
+        print_load("ld", reg, "3");
+      }
+    }
+    std::string tmp = GetRegisterName(TMP);
+    auto print_fp_load = [&](const std::string& load, FRegister rd, const std::string& label) {
+      std::string rd_name = GetFPRegName(rd);
+      expected += "1:\n"
+                  "auipc " + tmp + ", %pcrel_hi(" + label + "f)\n" +
+                  load + " " + rd_name + ", %pcrel_lo(1b)(" + tmp + ")\n";
+    };
+    for (FRegister freg : GetFPRegisters()) {
+      __ FLoadw(freg, narrow_literal);
+      print_fp_load("flw", freg, "2");
+      __ FLoadd(freg, wide_literal);
+      print_fp_load("fld", freg, "3");
+    }
+    // All literal loads above emit 8 bytes of code. The narrow literal shall emit 4 bytes of code.
+    // If we do not add another instruction, we shall end up with padding before the long literal.
+    expected += EmitNops(with_padding_for_long ? 0u : sizeof(uint32_t));
+    expected += "2:\n"
+                ".4byte 0x12345678\n" +
+                std::string(with_padding_for_long ? ".4byte 0\n" : "") +
+                "3:\n"
+                ".8byte 0x1234567887654321\n";
+    DriverStr(expected, test_name);
+  }
+
+  std::string RepeatFFFFRoundingMode(
+      void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FRegister, FPRoundingMode),
+      const std::string& fmt) {
+    CHECK(f != nullptr);
+    std::string str;
+    for (FRegister reg1 : GetFPRegisters()) {
+      for (FRegister reg2 : GetFPRegisters()) {
+        for (FRegister reg3 : GetFPRegisters()) {
+          for (FRegister reg4 : GetFPRegisters()) {
+            for (FPRoundingMode rm : kRoundingModes) {
+              (GetAssembler()->*f)(reg1, reg2, reg3, reg4, rm);
+
+              std::string base = fmt;
+              ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base);
+              ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base);
+              ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base);
+              ReplaceReg(REG4_TOKEN, GetFPRegName(reg4), &base);
+              ReplaceRoundingMode(rm, &base);
+              str += base;
+              str += "\n";
+            }
+          }
+        }
+      }
+    }
+    return str;
+  }
+
+  std::string RepeatFFFRoundingMode(
+      void (Riscv64Assembler::*f)(FRegister, FRegister, FRegister, FPRoundingMode),
+      const std::string& fmt) {
+    CHECK(f != nullptr);
+    std::string str;
+    for (FRegister reg1 : GetFPRegisters()) {
+      for (FRegister reg2 : GetFPRegisters()) {
+        for (FRegister reg3 : GetFPRegisters()) {
+          for (FPRoundingMode rm : kRoundingModes) {
+            (GetAssembler()->*f)(reg1, reg2, reg3, rm);
+
+            std::string base = fmt;
+            ReplaceReg(REG1_TOKEN, GetFPRegName(reg1), &base);
+            ReplaceReg(REG2_TOKEN, GetFPRegName(reg2), &base);
+            ReplaceReg(REG3_TOKEN, GetFPRegName(reg3), &base);
+            ReplaceRoundingMode(rm, &base);
+            str += base;
+            str += "\n";
+          }
+        }
+      }
+    }
+    return str;
+  }
+
+  template <typename Reg1, typename Reg2>
+  std::string RepeatTemplatedRegistersRoundingMode(
+      void (Riscv64Assembler::*f)(Reg1, Reg2, FPRoundingMode),
+      ArrayRef<const Reg1> reg1_registers,
+      ArrayRef<const Reg2> reg2_registers,
+      std::string (Base::*GetName1)(const Reg1&),
+      std::string (Base::*GetName2)(const Reg2&),
+      const std::string& fmt) {
+    CHECK(f != nullptr);
+    std::string str;
+    for (Reg1 reg1 : reg1_registers) {
+      for (Reg2 reg2 : reg2_registers) {
+        for (FPRoundingMode rm : kRoundingModes) {
+          (GetAssembler()->*f)(reg1, reg2, rm);
+
+          std::string base = fmt;
+          ReplaceReg(REG1_TOKEN, (this->*GetName1)(reg1), &base);
+          ReplaceReg(REG2_TOKEN, (this->*GetName2)(reg2), &base);
+          ReplaceRoundingMode(rm, &base);
+          str += base;
+          str += "\n";
+        }
+      }
+    }
+    return str;
+  }
+
+  std::string RepeatFFRoundingMode(
+      void (Riscv64Assembler::*f)(FRegister, FRegister, FPRoundingMode),
+      const std::string& fmt) {
+    return RepeatTemplatedRegistersRoundingMode(f,
+                                                GetFPRegisters(),
+                                                GetFPRegisters(),
+                                                &AssemblerRISCV64Test::GetFPRegName,
+                                                &AssemblerRISCV64Test::GetFPRegName,
+                                                fmt);
+  }
+
+  std::string RepeatrFRoundingMode(
+      void (Riscv64Assembler::*f)(XRegister, FRegister, FPRoundingMode),
+      const std::string& fmt) {
+    return RepeatTemplatedRegistersRoundingMode(f,
+                                                GetRegisters(),
+                                                GetFPRegisters(),
+                                                &Base::GetSecondaryRegisterName,
+                                                &AssemblerRISCV64Test::GetFPRegName,
+                                                fmt);
+  }
+
+  std::string RepeatFrRoundingMode(
+      void (Riscv64Assembler::*f)(FRegister, XRegister, FPRoundingMode),
+      const std::string& fmt) {
+    return RepeatTemplatedRegistersRoundingMode(f,
+                                                GetFPRegisters(),
+                                                GetRegisters(),
+                                                &AssemblerRISCV64Test::GetFPRegName,
+                                                &Base::GetSecondaryRegisterName,
+                                                fmt);
+  }
+
+  template <typename InvalidAqRl>
+  std::string RepeatRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, AqRl),
+                           const std::string& fmt,
+                           InvalidAqRl&& invalid_aqrl) {
+    CHECK(f != nullptr);
+    std::string str;
+    for (XRegister reg1 : GetRegisters()) {
+      for (XRegister reg2 : GetRegisters()) {
+        for (AqRl aqrl : kAqRls) {
+          if (invalid_aqrl(aqrl)) {
+            continue;
+          }
+          (GetAssembler()->*f)(reg1, reg2, aqrl);
+
+          std::string base = fmt;
+          ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base);
+          ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base);
+          ReplaceAqRl(aqrl, &base);
+          str += base;
+          str += "\n";
+        }
+      }
+    }
+    return str;
+  }
+
+  template <typename InvalidAqRl>
+  std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl),
+                            const std::string& fmt,
+                            InvalidAqRl&& invalid_aqrl) {
+    CHECK(f != nullptr);
+    std::string str;
+    for (XRegister reg1 : GetRegisters()) {
+      for (XRegister reg2 : GetRegisters()) {
+        for (XRegister reg3 : GetRegisters()) {
+          for (AqRl aqrl : kAqRls) {
+            if (invalid_aqrl(aqrl)) {
+              continue;
+            }
+            (GetAssembler()->*f)(reg1, reg2, reg3, aqrl);
+
+            std::string base = fmt;
+            ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base);
+            ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base);
+            ReplaceReg(REG3_TOKEN, GetRegisterName(reg3), &base);
+            ReplaceAqRl(aqrl, &base);
+            str += base;
+            str += "\n";
+          }
+        }
+      }
+    }
+    return str;
+  }
+
+  std::string RepeatRRRAqRl(void (Riscv64Assembler::*f)(XRegister, XRegister, XRegister, AqRl),
+                            const std::string& fmt) {
+    return RepeatRRRAqRl(f, fmt, [](AqRl) { return false; });
+  }
+
+  std::string RepeatCsrrX(void (Riscv64Assembler::*f)(XRegister, uint32_t, XRegister),
+                          const std::string& fmt) {
+    CHECK(f != nullptr);
+    std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+    std::string str;
+    for (XRegister reg1 : GetRegisters()) {
+      for (int64_t csr : csrs) {
+        for (XRegister reg2 : GetRegisters()) {
+          (GetAssembler()->*f)(reg1, dchecked_integral_cast<uint32_t>(csr), reg2);
+
+          std::string base = fmt;
+          ReplaceReg(REG1_TOKEN, GetRegisterName(reg1), &base);
+          ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+          ReplaceReg(REG2_TOKEN, GetRegisterName(reg2), &base);
+          str += base;
+          str += "\n";
+        }
+      }
+    }
+    return str;
+  }
+
+  std::string RepeatCsrrXi(void (Riscv64Assembler::*f)(XRegister, uint32_t, uint32_t),
+                           const std::string& fmt) {
+    CHECK(f != nullptr);
+    std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+    std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true);
+    std::string str;
+    for (XRegister reg : GetRegisters()) {
+      for (int64_t csr : csrs) {
+        for (int64_t uimm : uimms) {
+          (GetAssembler()->*f)(
+              reg, dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm));
+
+          std::string base = fmt;
+          ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base);
+          ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+          ReplaceCsrrImm(UIMM_TOKEN, uimm, &base);
+          str += base;
+          str += "\n";
+        }
+      }
+    }
+    return str;
+  }
+
+  template <typename EmitCssrX>
+  void TestCsrrXMacro(const std::string& test_name,
+                      const std::string& fmt,
+                      EmitCssrX&& emit_csrrx) {
+    std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+    std::string expected;
+    for (XRegister reg : GetRegisters()) {
+      for (int64_t csr : csrs) {
+        emit_csrrx(dchecked_integral_cast<uint32_t>(csr), reg);
+
+        std::string base = fmt;
+        ReplaceReg(REG_TOKEN, GetRegisterName(reg), &base);
+        ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+        expected += base;
+        expected += "\n";
+      }
+    }
+    DriverStr(expected, test_name);
+  }
+
+  template <typename EmitCssrXi>
+  void TestCsrrXiMacro(const std::string& test_name,
+                       const std::string& fmt,
+                       EmitCssrXi&& emit_csrrxi) {
+    std::vector<int64_t> csrs = CreateImmediateValuesBits(12, /*as_uint=*/ true);
+    std::vector<int64_t> uimms = CreateImmediateValuesBits(2, /*as_uint=*/ true);
+    std::string expected;
+    for (int64_t csr : csrs) {
+      for (int64_t uimm : uimms) {
+        emit_csrrxi(dchecked_integral_cast<uint32_t>(csr), dchecked_integral_cast<uint32_t>(uimm));
+
+        std::string base = fmt;
+        ReplaceCsrrImm(CSR_TOKEN, csr, &base);
+        ReplaceCsrrImm(UIMM_TOKEN, uimm, &base);
+        expected += base;
+        expected += "\n";
+      }
+    }
+    DriverStr(expected, test_name);
+  }
+
+ private:
+  static constexpr const char* RM_TOKEN = "{rm}";
+  static constexpr const char* AQRL_TOKEN = "{aqrl}";
+  static constexpr const char* CSR_TOKEN = "{csr}";
+  static constexpr const char* UIMM_TOKEN = "{uimm}";
+
+  static constexpr AqRl kAqRls[] = { AqRl::kNone, AqRl::kRelease, AqRl::kAcquire, AqRl::kAqRl };
+
+  static constexpr FPRoundingMode kRoundingModes[] = {
+      FPRoundingMode::kRNE,
+      FPRoundingMode::kRTZ,
+      FPRoundingMode::kRDN,
+      FPRoundingMode::kRUP,
+      FPRoundingMode::kRMM,
+      FPRoundingMode::kDYN
+  };
+
+  void ReplaceRoundingMode(FPRoundingMode rm, /*inout*/ std::string* str) {
+    const char* replacement;
+    switch (rm) {
+      case FPRoundingMode::kRNE:
+        replacement = "rne";
+        break;
+      case FPRoundingMode::kRTZ:
+        replacement = "rtz";
+        break;
+      case FPRoundingMode::kRDN:
+        replacement = "rdn";
+        break;
+      case FPRoundingMode::kRUP:
+        replacement = "rup";
+        break;
+      case FPRoundingMode::kRMM:
+        replacement = "rmm";
+        break;
+      case FPRoundingMode::kDYN:
+        replacement = "dyn";
+        break;
+      default:
+        LOG(FATAL) << "Unexpected value for rm: " << enum_cast<uint32_t>(rm);
+        UNREACHABLE();
+    }
+    size_t rm_index = str->find(RM_TOKEN);
+    EXPECT_NE(rm_index, std::string::npos);
+    if (rm_index != std::string::npos) {
+      str->replace(rm_index, ConstexprStrLen(RM_TOKEN), replacement);
+    }
+  }
+
+  void ReplaceAqRl(AqRl aqrl, /*inout*/ std::string* str) {
+    const char* replacement;
+    switch (aqrl) {
+      case AqRl::kNone:
+        replacement = "";
+        break;
+      case AqRl::kRelease:
+        replacement = ".rl";
+        break;
+      case AqRl::kAcquire:
+        replacement = ".aq";
+        break;
+      case AqRl::kAqRl:
+        replacement = ".aqrl";
+        break;
+      default:
+        LOG(FATAL) << "Unexpected value for `aqrl`: " << enum_cast<uint32_t>(aqrl);
+        UNREACHABLE();
+    }
+    size_t aqrl_index = str->find(AQRL_TOKEN);
+    EXPECT_NE(aqrl_index, std::string::npos);
+    if (aqrl_index != std::string::npos) {
+      str->replace(aqrl_index, ConstexprStrLen(AQRL_TOKEN), replacement);
+    }
+  }
+
+  static void ReplaceCsrrImm(const std::string& imm_token,
+                             int64_t imm,
+                             /*inout*/ std::string* str) {
+    size_t imm_index = str->find(imm_token);
+    EXPECT_NE(imm_index, std::string::npos);
+    if (imm_index != std::string::npos) {
+      str->replace(imm_index, imm_token.length(), std::to_string(imm));
+    }
+  }
+
+  std::map<XRegister, std::string, RISCV64CpuRegisterCompare> secondary_register_names_;
+
+  std::unique_ptr<const Riscv64InstructionSetFeatures> instruction_set_features_;
+  bool use_simple_march_ = false;
+};
+
+TEST_F(AssemblerRISCV64Test, Toolchain) { EXPECT_TRUE(CheckTools()); }
+
+TEST_F(AssemblerRISCV64Test, Lui) {
+  DriverStr(RepeatRIb(&Riscv64Assembler::Lui, 20, "lui {reg}, {imm}"), "Lui");
+}
+
+TEST_F(AssemblerRISCV64Test, Auipc) {
+  DriverStr(RepeatRIb(&Riscv64Assembler::Auipc, 20, "auipc {reg}, {imm}"), "Auipc");
+}
+
+TEST_F(AssemblerRISCV64Test, Jal) {
+  // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension.
+  DriverStr(RepeatRIbS(&Riscv64Assembler::Jal, -19, 2, "jal {reg}, {imm}\n"), "Jal");
+}
+
+TEST_F(AssemblerRISCV64Test, Jalr) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Jalr, -12, "jalr {reg1}, {reg2}, {imm}\n"), "Jalr");
+}
+
+TEST_F(AssemblerRISCV64Test, Beq) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Beq, -11, 2, "beq {reg1}, {reg2}, {imm}\n"), "Beq");
+}
+
+TEST_F(AssemblerRISCV64Test, Bne) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Bne, -11, 2, "bne {reg1}, {reg2}, {imm}\n"), "Bne");
+}
+
+TEST_F(AssemblerRISCV64Test, Blt) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Blt, -11, 2, "blt {reg1}, {reg2}, {imm}\n"), "Blt");
+}
+
+TEST_F(AssemblerRISCV64Test, Bge) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Bge, -11, 2, "bge {reg1}, {reg2}, {imm}\n"), "Bge");
+}
+
+TEST_F(AssemblerRISCV64Test, Bltu) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Bltu, -11, 2, "bltu {reg1}, {reg2}, {imm}\n"), "Bltu");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgeu) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgeu, -11, 2, "bgeu {reg1}, {reg2}, {imm}\n"), "Bgeu");
+}
+
+TEST_F(AssemblerRISCV64Test, Lb) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Lb, -12, "lb {reg1}, {imm}({reg2})"), "Lb");
+}
+
+TEST_F(AssemblerRISCV64Test, Lh) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Lh, -12, "lh {reg1}, {imm}({reg2})"), "Lh");
+}
+
+TEST_F(AssemblerRISCV64Test, Lw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Lw, -12, "lw {reg1}, {imm}({reg2})"), "Lw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ld) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Ld, -12, "ld {reg1}, {imm}({reg2})"), "Ld");
+}
+
+TEST_F(AssemblerRISCV64Test, Lbu) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Lbu, -12, "lbu {reg1}, {imm}({reg2})"), "Lbu");
+}
+
+TEST_F(AssemblerRISCV64Test, Lhu) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Lhu, -12, "lhu {reg1}, {imm}({reg2})"), "Lhu");
+}
+
+TEST_F(AssemblerRISCV64Test, Lwu) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Lwu, -12, "lwu {reg1}, {imm}({reg2})"), "Lwu");
+}
+
+TEST_F(AssemblerRISCV64Test, Sb) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Sb, -12, "sb {reg1}, {imm}({reg2})"), "Sb");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Sh, -12, "sh {reg1}, {imm}({reg2})"), "Sh");
+}
+
+TEST_F(AssemblerRISCV64Test, Sw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Sw, -12, "sw {reg1}, {imm}({reg2})"), "Sw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sd) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Sd, -12, "sd {reg1}, {imm}({reg2})"), "Sd");
+}
+
+TEST_F(AssemblerRISCV64Test, Addi) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Addi, -12, "addi {reg1}, {reg2}, {imm}"), "Addi");
+}
+
+TEST_F(AssemblerRISCV64Test, Slti) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Slti, -12, "slti {reg1}, {reg2}, {imm}"), "Slti");
+}
+
+TEST_F(AssemblerRISCV64Test, Sltiu) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Sltiu, -12, "sltiu {reg1}, {reg2}, {imm}"), "Sltiu");
+}
+
+TEST_F(AssemblerRISCV64Test, Xori) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Xori, 11, "xori {reg1}, {reg2}, {imm}"), "Xori");
+}
+
+TEST_F(AssemblerRISCV64Test, Ori) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Ori, -12, "ori {reg1}, {reg2}, {imm}"), "Ori");
+}
+
+TEST_F(AssemblerRISCV64Test, Andi) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Andi, -12, "andi {reg1}, {reg2}, {imm}"), "Andi");
+}
+
+TEST_F(AssemblerRISCV64Test, Slli) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Slli, 6, "slli {reg1}, {reg2}, {imm}"), "Slli");
+}
+
+TEST_F(AssemblerRISCV64Test, Srli) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Srli, 6, "srli {reg1}, {reg2}, {imm}"), "Srli");
+}
+
+TEST_F(AssemblerRISCV64Test, Srai) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Srai, 6, "srai {reg1}, {reg2}, {imm}"), "Srai");
+}
+
+TEST_F(AssemblerRISCV64Test, Add) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Add, "add {reg1}, {reg2}, {reg3}"), "Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sub) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sub, "sub {reg1}, {reg2}, {reg3}"), "Sub");
+}
+
+TEST_F(AssemblerRISCV64Test, Slt) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Slt, "slt {reg1}, {reg2}, {reg3}"), "Slt");
+}
+
+TEST_F(AssemblerRISCV64Test, Sltu) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sltu, "sltu {reg1}, {reg2}, {reg3}"), "Sltu");
+}
+
+TEST_F(AssemblerRISCV64Test, Xor) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Xor, "xor {reg1}, {reg2}, {reg3}"), "Xor");
+}
+
+TEST_F(AssemblerRISCV64Test, Or) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Or, "or {reg1}, {reg2}, {reg3}"), "Or");
+}
+
+TEST_F(AssemblerRISCV64Test, And) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::And, "and {reg1}, {reg2}, {reg3}"), "And");
+}
+
+TEST_F(AssemblerRISCV64Test, Sll) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sll, "sll {reg1}, {reg2}, {reg3}"), "Sll");
+}
+
+TEST_F(AssemblerRISCV64Test, Srl) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Srl, "srl {reg1}, {reg2}, {reg3}"), "Srl");
+}
+
+TEST_F(AssemblerRISCV64Test, Sra) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sra, "sra {reg1}, {reg2}, {reg3}"), "Sra");
+}
+
+TEST_F(AssemblerRISCV64Test, Addiw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Addiw, -12, "addiw {reg1}, {reg2}, {imm}"), "Addiw");
+}
+
+TEST_F(AssemblerRISCV64Test, Slliw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Slliw, 5, "slliw {reg1}, {reg2}, {imm}"), "Slliw");
+}
+
+TEST_F(AssemblerRISCV64Test, Srliw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Srliw, 5, "srliw {reg1}, {reg2}, {imm}"), "Srliw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sraiw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Sraiw, 5, "sraiw {reg1}, {reg2}, {imm}"), "Sraiw");
+}
+
+TEST_F(AssemblerRISCV64Test, Addw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Addw, "addw {reg1}, {reg2}, {reg3}"), "Addw");
+}
+
+TEST_F(AssemblerRISCV64Test, Subw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Subw, "subw {reg1}, {reg2}, {reg3}"), "Subw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sllw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sllw, "sllw {reg1}, {reg2}, {reg3}"), "Sllw");
+}
+
+TEST_F(AssemblerRISCV64Test, Srlw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Srlw, "srlw {reg1}, {reg2}, {reg3}"), "Srlw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sraw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sraw, "sraw {reg1}, {reg2}, {reg3}"), "Sraw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ecall) {
+  __ Ecall();
+  DriverStr("ecall\n", "Ecall");
+}
+
+TEST_F(AssemblerRISCV64Test, Ebreak) {
+  __ Ebreak();
+  DriverStr("ebreak\n", "Ebreak");
+}
+
+TEST_F(AssemblerRISCV64Test, Fence) {
+  auto get_fence_type_string = [](uint32_t fence_type) {
+    CHECK_LE(fence_type, 0xfu);
+    std::string result;
+    if ((fence_type & kFenceInput) != 0u) {
+      result += "i";
+    }
+    if ((fence_type & kFenceOutput) != 0u) {
+      result += "o";
+    }
+    if ((fence_type & kFenceRead) != 0u) {
+      result += "r";
+    }
+    if ((fence_type & kFenceWrite) != 0u) {
+      result += "w";
+    }
+    if (result.empty()) {
+      result += "0";
+    }
+    return result;
+  };
+
+  std::string expected;
+  // Note: The `pred` and `succ` are 4 bits each.
+  // Some combinations are not really useful but the assembler can emit them all.
+  for (uint32_t pred = 0u; pred != 0x10; ++pred) {
+    for (uint32_t succ = 0u; succ != 0x10; ++succ) {
+      __ Fence(pred, succ);
+      expected +=
+          "fence " + get_fence_type_string(pred) + ", " + get_fence_type_string(succ) + "\n";
+    }
+  }
+  DriverStr(expected, "Fence");
+}
+
+TEST_F(AssemblerRISCV64Test, FenceTso) {
+  __ FenceTso();
+  DriverStr("fence.tso", "FenceTso");
+}
+
+TEST_F(AssemblerRISCV64Test, FenceI) {
+  __ FenceI();
+  DriverStr("fence.i", "FenceI");
+}
+
+TEST_F(AssemblerRISCV64Test, Mul) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Mul, "mul {reg1}, {reg2}, {reg3}"), "Mul");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulh) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Mulh, "mulh {reg1}, {reg2}, {reg3}"), "Mulh");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulhsu) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Mulhsu, "mulhsu {reg1}, {reg2}, {reg3}"), "Mulhsu");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulhu) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Mulhu, "mulhu {reg1}, {reg2}, {reg3}"), "Mulhu");
+}
+
+TEST_F(AssemblerRISCV64Test, Div) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Div, "div {reg1}, {reg2}, {reg3}"), "Div");
+}
+
+TEST_F(AssemblerRISCV64Test, Divu) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Divu, "divu {reg1}, {reg2}, {reg3}"), "Divu");
+}
+
+TEST_F(AssemblerRISCV64Test, Rem) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Rem, "rem {reg1}, {reg2}, {reg3}"), "Rem");
+}
+
+TEST_F(AssemblerRISCV64Test, Remu) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Remu, "remu {reg1}, {reg2}, {reg3}"), "Remu");
+}
+
+TEST_F(AssemblerRISCV64Test, Mulw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Mulw, "mulw {reg1}, {reg2}, {reg3}"), "Mulw");
+}
+
+TEST_F(AssemblerRISCV64Test, Divw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Divw, "divw {reg1}, {reg2}, {reg3}"), "Divw");
+}
+
+TEST_F(AssemblerRISCV64Test, Divuw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Divuw, "divuw {reg1}, {reg2}, {reg3}"), "Divuw");
+}
+
+TEST_F(AssemblerRISCV64Test, Remw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Remw, "remw {reg1}, {reg2}, {reg3}"), "Remw");
+}
+
+TEST_F(AssemblerRISCV64Test, Remuw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Remuw, "remuw {reg1}, {reg2}, {reg3}"), "Remuw");
+}
+
+TEST_F(AssemblerRISCV64Test, LrW) {
+  auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; };
+  DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrW, "lr.w{aqrl} {reg1}, ({reg2})", invalid_aqrl),
+            "LrW");
+}
+
+TEST_F(AssemblerRISCV64Test, LrD) {
+  auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kRelease; };
+  DriverStr(RepeatRRAqRl(&Riscv64Assembler::LrD, "lr.d{aqrl} {reg1}, ({reg2})", invalid_aqrl),
+            "LrD");
+}
+
+TEST_F(AssemblerRISCV64Test, ScW) {
+  auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; };
+  DriverStr(
+      RepeatRRRAqRl(&Riscv64Assembler::ScW, "sc.w{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl),
+      "ScW");
+}
+
+TEST_F(AssemblerRISCV64Test, ScD) {
+  auto invalid_aqrl = [](AqRl aqrl) { return aqrl == AqRl::kAcquire; };
+  DriverStr(
+      RepeatRRRAqRl(&Riscv64Assembler::ScD, "sc.d{aqrl} {reg1}, {reg2}, ({reg3})", invalid_aqrl),
+      "ScD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoSwapW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapW, "amoswap.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoSwapW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoSwapD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoSwapD, "amoswap.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoSwapD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAddW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddW, "amoadd.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoAddW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAddD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAddD, "amoadd.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoXorW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorW, "amoxor.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoXorW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoXorD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoXorD, "amoxor.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoXorD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAndW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndW, "amoand.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoAndW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoAndD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoAndD, "amoand.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoAndD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoOrW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrW, "amoor.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoOrW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoOrD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoOrD, "amoor.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoOrD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinW, "amomin.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMinW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinD, "amomin.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMinD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxW, "amomax.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMaxW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxD, "amomax.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMaxD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinuW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuW, "amominu.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMinuW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMinuD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMinuD, "amominu.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMinuD");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxuW) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuW, "amomaxu.w{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMaxuW");
+}
+
+TEST_F(AssemblerRISCV64Test, AmoMaxuD) {
+  DriverStr(RepeatRRRAqRl(&Riscv64Assembler::AmoMaxuD, "amomaxu.d{aqrl} {reg1}, {reg2}, ({reg3})"),
+            "AmoMaxuD");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrw) {
+  DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrw, "csrrw {reg1}, {csr}, {reg2}"), "Csrrw");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrs) {
+  DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrs, "csrrs {reg1}, {csr}, {reg2}"), "Csrrs");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrc) {
+  DriverStr(RepeatCsrrX(&Riscv64Assembler::Csrrc, "csrrc {reg1}, {csr}, {reg2}"), "Csrrc");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrwi) {
+  DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrwi, "csrrwi {reg}, {csr}, {uimm}"), "Csrrwi");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrsi) {
+  DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrsi, "csrrsi {reg}, {csr}, {uimm}"), "Csrrsi");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrrci) {
+  DriverStr(RepeatCsrrXi(&Riscv64Assembler::Csrrci, "csrrci {reg}, {csr}, {uimm}"), "Csrrci");
+}
+
+TEST_F(AssemblerRISCV64Test, FLw) {
+  DriverStr(RepeatFRIb(&Riscv64Assembler::FLw, -12, "flw {reg1}, {imm}({reg2})"), "FLw");
+}
+
+TEST_F(AssemblerRISCV64Test, FLd) {
+  DriverStr(RepeatFRIb(&Riscv64Assembler::FLd, -12, "fld {reg1}, {imm}({reg2})"), "FLw");
+}
+
+TEST_F(AssemblerRISCV64Test, FSw) {
+  DriverStr(RepeatFRIb(&Riscv64Assembler::FSw, 2, "fsw {reg1}, {imm}({reg2})"), "FSw");
+}
+
+TEST_F(AssemblerRISCV64Test, FSd) {
+  DriverStr(RepeatFRIb(&Riscv64Assembler::FSd, 2, "fsd {reg1}, {imm}({reg2})"), "FSd");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddS) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddS,
+                                   "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddS_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddS, "fmadd.s {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FMAddS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddD) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMAddD,
+                                   "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMAddD_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FMAddD, "fmadd.d {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FMAddD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubS) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubS,
+                                   "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubS_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubS, "fmsub.s {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FMSubS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubD) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FMSubD,
+                                  "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FMSubD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMSubD_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FMSubD, "fmsub.d {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FMSubD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubS) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubS,
+                                   "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubS");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubS_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubS, "fnmsub.s {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FNMSubS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubD) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMSubD,
+                                   "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMSubD");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMSubD_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FNMSubD, "fnmsub.d {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FNMSubD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddS) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddS,
+                                   "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddS");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddS_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddS, "fnmadd.s {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FNMAddS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddD) {
+  DriverStr(RepeatFFFFRoundingMode(&Riscv64Assembler::FNMAddD,
+                                   "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}, {rm}"), "FNMAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, FNMAddD_Default) {
+  DriverStr(RepeatFFFF(&Riscv64Assembler::FNMAddD, "fnmadd.d {reg1}, {reg2}, {reg3}, {reg4}"),
+            "FNMAddD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddS) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}, {rm}"),
+            "FAddS");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddS_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FAddS, "fadd.s {reg1}, {reg2}, {reg3}"), "FAddS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddD) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}, {rm}"),
+            "FAddD");
+}
+
+TEST_F(AssemblerRISCV64Test, FAddD_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FAddD, "fadd.d {reg1}, {reg2}, {reg3}"), "FAddD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubS) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}, {rm}"),
+            "FSubS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubS_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSubS, "fsub.s {reg1}, {reg2}, {reg3}"), "FSubS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubD) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}, {rm}"),
+            "FSubD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSubD_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSubD, "fsub.d {reg1}, {reg2}, {reg3}"), "FSubD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulS) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}, {rm}"),
+            "FMulS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulS_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FMulS, "fmul.s {reg1}, {reg2}, {reg3}"), "FMulS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulD) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}, {rm}"),
+            "FMulD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMulD_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FMulD, "fmul.d {reg1}, {reg2}, {reg3}"), "FMulD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivS) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}, {rm}"),
+            "FDivS");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivS_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FDivS, "fdiv.s {reg1}, {reg2}, {reg3}"), "FDivS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivD) {
+  DriverStr(RepeatFFFRoundingMode(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}, {rm}"),
+            "FDivD");
+}
+
+TEST_F(AssemblerRISCV64Test, FDivD_Default) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FDivD, "fdiv.d {reg1}, {reg2}, {reg3}"), "FDivD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtS) {
+  DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}, {rm}"),
+            "FSqrtS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtS_Default) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FSqrtS, "fsqrt.s {reg1}, {reg2}"), "FSqrtS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtD) {
+  DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}, {rm}"),
+            "FSqrtD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSqrtD_Default) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FSqrtD, "fsqrt.d {reg1}, {reg2}"), "FSqrtD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjS) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjS, "fsgnj.s {reg1}, {reg2}, {reg3}"), "FSgnjS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjD) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjD, "fsgnj.d {reg1}, {reg2}, {reg3}"), "FSgnjD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjnS) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnS, "fsgnjn.s {reg1}, {reg2}, {reg3}"), "FSgnjnS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjnD) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjnD, "fsgnjn.d {reg1}, {reg2}, {reg3}"), "FSgnjnD");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjxS) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxS, "fsgnjx.s {reg1}, {reg2}, {reg3}"), "FSgnjxS");
+}
+
+TEST_F(AssemblerRISCV64Test, FSgnjxD) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FSgnjxD, "fsgnjx.d {reg1}, {reg2}, {reg3}"), "FSgnjxD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMinS) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FMinS, "fmin.s {reg1}, {reg2}, {reg3}"), "FMinS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMinD) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FMinD, "fmin.d {reg1}, {reg2}, {reg3}"), "FMinD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMaxS) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FMaxS, "fmax.s {reg1}, {reg2}, {reg3}"), "FMaxS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMaxD) {
+  DriverStr(RepeatFFF(&Riscv64Assembler::FMaxD, "fmax.d {reg1}, {reg2}, {reg3}"), "FMaxD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSD) {
+  DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}, {rm}"),
+            "FCvtSD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSD_Default) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FCvtSD, "fcvt.s.d {reg1}, {reg2}"), "FCvtSD_Default");
+}
+
+// This conversion is lossless, so the rounding mode is meaningless and the assembler we're
+// testing against does not even accept the rounding mode argument, so this test is disabled.
+TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDS) {
+  DriverStr(RepeatFFRoundingMode(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}, {rm}"),
+            "FCvtDS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDS_Default) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FCvtDS, "fcvt.d.s {reg1}, {reg2}"), "FCvtDS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FEqS) {
+  DriverStr(RepeatRFF(&Riscv64Assembler::FEqS, "feq.s {reg1}, {reg2}, {reg3}"), "FEqS");
+}
+
+TEST_F(AssemblerRISCV64Test, FEqD) {
+  DriverStr(RepeatRFF(&Riscv64Assembler::FEqD, "feq.d {reg1}, {reg2}, {reg3}"), "FEqD");
+}
+
+TEST_F(AssemblerRISCV64Test, FLtS) {
+  DriverStr(RepeatRFF(&Riscv64Assembler::FLtS, "flt.s {reg1}, {reg2}, {reg3}"), "FLtS");
+}
+
+TEST_F(AssemblerRISCV64Test, FLtD) {
+  DriverStr(RepeatRFF(&Riscv64Assembler::FLtD, "flt.d {reg1}, {reg2}, {reg3}"), "FLtD");
+}
+
+TEST_F(AssemblerRISCV64Test, FLeS) {
+  DriverStr(RepeatRFF(&Riscv64Assembler::FLeS, "fle.s {reg1}, {reg2}, {reg3}"), "FLeS");
+}
+
+TEST_F(AssemblerRISCV64Test, FLeD) {
+  DriverStr(RepeatRFF(&Riscv64Assembler::FLeD, "fle.d {reg1}, {reg2}, {reg3}"), "FLeD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWS) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}, {rm}"),
+            "FCvtWS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWS_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtWS, "fcvt.w.s {reg1}, {reg2}"), "FCvtWS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWD) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}, {rm}"),
+            "FCvtWD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWD_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtWD, "fcvt.w.d {reg1}, {reg2}"), "FCvtWD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuS) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}, {rm}"),
+            "FCvtWuS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuS_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuS, "fcvt.wu.s {reg1}, {reg2}"), "FCvtWuS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuD) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}, {rm}"),
+            "FCvtWuD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtWuD_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtWuD, "fcvt.wu.d {reg1}, {reg2}"), "FCvtWuD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLS) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}, {rm}"),
+            "FCvtLS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLS_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtLS, "fcvt.l.s {reg1}, {reg2}"), "FCvtLS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLD) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}, {rm}"),
+            "FCvtLD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLD_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtLD, "fcvt.l.d {reg1}, {reg2}"), "FCvtLD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuS) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}, {rm}"),
+            "FCvtLuS");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuS_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuS, "fcvt.lu.s {reg1}, {reg2}"), "FCvtLuS_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuD) {
+  DriverStr(RepeatrFRoundingMode(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}, {rm}"),
+            "FCvtLuD");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtLuD_Default) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FCvtLuD, "fcvt.lu.d {reg1}, {reg2}"), "FCvtLuD_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSW) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}, {rm}"),
+            "FCvtSW");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSW_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtSW, "fcvt.s.w {reg1}, {reg2}"), "FCvtSW_Default");
+}
+
+// This conversion is lossless, so the rounding mode is meaningless and the assembler we're
+// testing against does not even accept the rounding mode argument, so this test is disabled.
+TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDW) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}, {rm}"),
+            "FCvtDW");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDW_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtDW, "fcvt.d.w {reg1}, {reg2}"), "FCvtDW_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSWu) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}, {rm}"),
+            "FCvtSWu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSWu_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtSWu, "fcvt.s.wu {reg1}, {reg2}"), "FCvtSWu_Default");
+}
+
+// This conversion is lossless, so the rounding mode is meaningless and the assembler we're
+// testing against does not even accept the rounding mode argument, so this test is disabled.
+TEST_F(AssemblerRISCV64Test, DISABLED_FCvtDWu) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}, {rm}"),
+            "FCvtDWu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDWu_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtDWu, "fcvt.d.wu {reg1}, {reg2}"), "FCvtDWu_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSL) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}, {rm}"),
+            "FCvtSL");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSL_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtSL, "fcvt.s.l {reg1}, {reg2}"), "FCvtSL_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDL) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}, {rm}"),
+            "FCvtDL");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDL_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtDL, "fcvt.d.l {reg1}, {reg2}"), "FCvtDL_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSLu) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}, {rm}"),
+            "FCvtSLu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtSLu_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtSLu, "fcvt.s.lu {reg1}, {reg2}"), "FCvtSLu_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDLu) {
+  DriverStr(RepeatFrRoundingMode(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}, {rm}"),
+            "FCvtDLu");
+}
+
+TEST_F(AssemblerRISCV64Test, FCvtDLu_Default) {
+  DriverStr(RepeatFr(&Riscv64Assembler::FCvtDLu, "fcvt.d.lu {reg1}, {reg2}"), "FCvtDLu_Default");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvXW) {
+  DriverStr(RepeatRF(&Riscv64Assembler::FMvXW, "fmv.x.w {reg1}, {reg2}"), "FMvXW");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvXD) {
+  DriverStr(RepeatRF(&Riscv64Assembler::FMvXD, "fmv.x.d {reg1}, {reg2}"), "FMvXD");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvWX) {
+  DriverStr(RepeatFR(&Riscv64Assembler::FMvWX, "fmv.w.x {reg1}, {reg2}"), "FMvWX");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvDX) {
+  DriverStr(RepeatFR(&Riscv64Assembler::FMvDX, "fmv.d.x {reg1}, {reg2}"), "FMvDX");
+}
+
+TEST_F(AssemblerRISCV64Test, FClassS) {
+  DriverStr(RepeatRF(&Riscv64Assembler::FClassS, "fclass.s {reg1}, {reg2}"), "FClassS");
+}
+
+TEST_F(AssemblerRISCV64Test, FClassD) {
+  DriverStr(RepeatrF(&Riscv64Assembler::FClassD, "fclass.d {reg1}, {reg2}"), "FClassD");
+}
+
+TEST_F(AssemblerRISCV64Test, AddUw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::AddUw, "add.uw {reg1}, {reg2}, {reg3}"), "AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh1Add) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sh1Add, "sh1add {reg1}, {reg2}, {reg3}"), "Sh1Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh1AddUw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sh1AddUw, "sh1add.uw {reg1}, {reg2}, {reg3}"), "Sh1AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh2Add) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sh2Add, "sh2add {reg1}, {reg2}, {reg3}"), "Sh2Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh2AddUw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sh2AddUw, "sh2add.uw {reg1}, {reg2}, {reg3}"), "Sh2AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh3Add) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sh3Add, "sh3add {reg1}, {reg2}, {reg3}"), "Sh3Add");
+}
+
+TEST_F(AssemblerRISCV64Test, Sh3AddUw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Sh3AddUw, "sh3add.uw {reg1}, {reg2}, {reg3}"), "Sh3AddUw");
+}
+
+TEST_F(AssemblerRISCV64Test, SlliUw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::SlliUw, 6, "slli.uw {reg1}, {reg2}, {imm}"), "SlliUw");
+}
+
+TEST_F(AssemblerRISCV64Test, Andn) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Andn, "andn {reg1}, {reg2}, {reg3}"), "Andn");
+}
+
+TEST_F(AssemblerRISCV64Test, Orn) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Orn, "orn {reg1}, {reg2}, {reg3}"), "Orn");
+}
+
+TEST_F(AssemblerRISCV64Test, Xnor) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Xnor, "xnor {reg1}, {reg2}, {reg3}"), "Xnor");
+}
+
+TEST_F(AssemblerRISCV64Test, Clz) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Clz, "clz {reg1}, {reg2}"), "Clz");
+}
+
+TEST_F(AssemblerRISCV64Test, Clzw) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Clzw, "clzw {reg1}, {reg2}"), "Clzw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ctz) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Ctz, "ctz {reg1}, {reg2}"), "Ctz");
+}
+
+TEST_F(AssemblerRISCV64Test, Ctzw) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Ctzw, "ctzw {reg1}, {reg2}"), "Ctzw");
+}
+
+TEST_F(AssemblerRISCV64Test, Cpop) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Cpop, "cpop {reg1}, {reg2}"), "Cpop");
+}
+
+TEST_F(AssemblerRISCV64Test, Cpopw) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Cpopw, "cpopw {reg1}, {reg2}"), "Cpopw");
+}
+
+TEST_F(AssemblerRISCV64Test, Min) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Min, "min {reg1}, {reg2}, {reg3}"), "Min");
+}
+
+TEST_F(AssemblerRISCV64Test, Minu) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Minu, "minu {reg1}, {reg2}, {reg3}"), "Minu");
+}
+
+TEST_F(AssemblerRISCV64Test, Max) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Max, "max {reg1}, {reg2}, {reg3}"), "Max");
+}
+
+TEST_F(AssemblerRISCV64Test, Maxu) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Maxu, "maxu {reg1}, {reg2}, {reg3}"), "Maxu");
+}
+
+TEST_F(AssemblerRISCV64Test, Rol) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Rol, "rol {reg1}, {reg2}, {reg3}"), "Rol");
+}
+
+TEST_F(AssemblerRISCV64Test, Rolw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Rolw, "rolw {reg1}, {reg2}, {reg3}"), "Rolw");
+}
+
+TEST_F(AssemblerRISCV64Test, Ror) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Ror, "ror {reg1}, {reg2}, {reg3}"), "Ror");
+}
+
+TEST_F(AssemblerRISCV64Test, Rorw) {
+  DriverStr(RepeatRRR(&Riscv64Assembler::Rorw, "rorw {reg1}, {reg2}, {reg3}"), "Rorw");
+}
+
+TEST_F(AssemblerRISCV64Test, Rori) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Rori, 6, "rori {reg1}, {reg2}, {imm}"), "Rori");
+}
+
+TEST_F(AssemblerRISCV64Test, Roriw) {
+  DriverStr(RepeatRRIb(&Riscv64Assembler::Roriw, 5, "roriw {reg1}, {reg2}, {imm}"), "Roriw");
+}
+
+TEST_F(AssemblerRISCV64Test, OrcB) {
+  DriverStr(RepeatRR(&Riscv64Assembler::OrcB, "orc.b {reg1}, {reg2}"), "OrcB");
+}
+
+TEST_F(AssemblerRISCV64Test, Rev8) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Rev8, "rev8 {reg1}, {reg2}"), "Rev8");
+}
+
+// Pseudo instructions.
+TEST_F(AssemblerRISCV64Test, Nop) {
+  __ Nop();
+  DriverStr("addi zero,zero,0", "Nop");
+}
+
+TEST_F(AssemblerRISCV64Test, Li) {
+  SetUseSimpleMarch(true);
+  TestLoadConst64("Li",
+                  /*can_use_tmp=*/ false,
+                  [&](XRegister rd, int64_t value) { __ Li(rd, value); });
+}
+
+TEST_F(AssemblerRISCV64Test, Mv) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Mv, "addi {reg1}, {reg2}, 0"), "Mv");
+}
+
+TEST_F(AssemblerRISCV64Test, Not) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Not, "xori {reg1}, {reg2}, -1"), "Not");
+}
+
+TEST_F(AssemblerRISCV64Test, Neg) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Neg, "sub {reg1}, x0, {reg2}"), "Neg");
+}
+
+TEST_F(AssemblerRISCV64Test, NegW) {
+  DriverStr(RepeatRR(&Riscv64Assembler::NegW, "subw {reg1}, x0, {reg2}"), "Neg");
+}
+
+TEST_F(AssemblerRISCV64Test, SextB) {
+  // Note: SEXT.B from the Zbb extension is not supported.
+  DriverStr(RepeatRR(&Riscv64Assembler::SextB,
+                     "slli {reg1}, {reg2}, 56\n"
+                     "srai {reg1}, {reg1}, 56"),
+            "SextB");
+}
+
+TEST_F(AssemblerRISCV64Test, SextH) {
+  // Note: SEXT.H from the Zbb extension is not supported.
+  DriverStr(RepeatRR(&Riscv64Assembler::SextH,
+                     "slli {reg1}, {reg2}, 48\n"
+                     "srai {reg1}, {reg1}, 48"),
+            "SextH");
+}
+
+TEST_F(AssemblerRISCV64Test, SextW) {
+  DriverStr(RepeatRR(&Riscv64Assembler::SextW, "addiw {reg1}, {reg2}, 0\n"), "SextW");
+}
+
+TEST_F(AssemblerRISCV64Test, ZextB) {
+  DriverStr(RepeatRR(&Riscv64Assembler::ZextB, "andi {reg1}, {reg2}, 255"), "ZextB");
+}
+
+TEST_F(AssemblerRISCV64Test, ZextH) {
+  // Note: ZEXT.H from the Zbb extension is not supported.
+  DriverStr(RepeatRR(&Riscv64Assembler::ZextH,
+                     "slli {reg1}, {reg2}, 48\n"
+                     "srli {reg1}, {reg1}, 48"),
+            "SextH");
+}
+
+TEST_F(AssemblerRISCV64Test, ZextW) {
+  DriverStr(RepeatRR(&Riscv64Assembler::ZextW,
+                     "slli {reg1}, {reg2}, 32\n"
+                     "srli {reg1}, {reg1}, 32"),
+            "ZextW");
+}
+
+TEST_F(AssemblerRISCV64Test, Seqz) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Seqz, "sltiu {reg1}, {reg2}, 1\n"), "Seqz");
+}
+
+TEST_F(AssemblerRISCV64Test, Snez) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Snez, "sltu {reg1}, zero, {reg2}\n"), "Snez");
+}
+
+TEST_F(AssemblerRISCV64Test, Sltz) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Sltz, "slt {reg1}, {reg2}, zero\n"), "Sltz");
+}
+
+TEST_F(AssemblerRISCV64Test, Sgtz) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Sgtz, "slt {reg1}, zero, {reg2}\n"), "Sgtz");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvS) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FMvS, "fsgnj.s {reg1}, {reg2}, {reg2}\n"), "FMvS");
+}
+
+TEST_F(AssemblerRISCV64Test, FAbsS) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FAbsS, "fsgnjx.s {reg1}, {reg2}, {reg2}\n"), "FAbsS");
+}
+
+TEST_F(AssemblerRISCV64Test, FNegS) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FNegS, "fsgnjn.s {reg1}, {reg2}, {reg2}\n"), "FNegS");
+}
+
+TEST_F(AssemblerRISCV64Test, FMvD) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FMvD, "fsgnj.d {reg1}, {reg2}, {reg2}\n"), "FMvD");
+}
+
+TEST_F(AssemblerRISCV64Test, FAbsD) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FAbsD, "fsgnjx.d {reg1}, {reg2}, {reg2}\n"), "FAbsD");
+}
+
+TEST_F(AssemblerRISCV64Test, FNegD) {
+  DriverStr(RepeatFF(&Riscv64Assembler::FNegD, "fsgnjn.d {reg1}, {reg2}, {reg2}\n"), "FNegD");
+}
+
+TEST_F(AssemblerRISCV64Test, Beqz) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRIbS(&Riscv64Assembler::Beqz, -11, 2, "beq {reg}, zero, {imm}\n"), "Beqz");
+}
+
+TEST_F(AssemblerRISCV64Test, Bnez) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRIbS(&Riscv64Assembler::Bnez, -11, 2, "bne {reg}, zero, {imm}\n"), "Bnez");
+}
+
+TEST_F(AssemblerRISCV64Test, Blez) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRIbS(&Riscv64Assembler::Blez, -11, 2, "bge zero, {reg}, {imm}\n"), "Blez");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgez) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRIbS(&Riscv64Assembler::Bgez, -11, 2, "bge {reg}, zero, {imm}\n"), "Bgez");
+}
+
+TEST_F(AssemblerRISCV64Test, Bltz) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRIbS(&Riscv64Assembler::Bltz, -11, 2, "blt {reg}, zero, {imm}\n"), "Bltz");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgtz) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRIbS(&Riscv64Assembler::Bgtz, -11, 2, "blt zero, {reg}, {imm}\n"), "Bgtz");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgt) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgt, -11, 2, "blt {reg2}, {reg1}, {imm}\n"), "Bgt");
+}
+
+TEST_F(AssemblerRISCV64Test, Ble) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Ble, -11, 2, "bge {reg2}, {reg1}, {imm}\n"), "Bge");
+}
+
+TEST_F(AssemblerRISCV64Test, Bgtu) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Bgtu, -11, 2, "bltu {reg2}, {reg1}, {imm}\n"), "Bgtu");
+}
+
+TEST_F(AssemblerRISCV64Test, Bleu) {
+  // TODO(riscv64): Change "-11, 2" to "-12, 1" for "C" Standard Extension.
+  DriverStr(RepeatRRIbS(&Riscv64Assembler::Bleu, -11, 2, "bgeu {reg2}, {reg1}, {imm}\n"), "Bgeu");
+}
+
+TEST_F(AssemblerRISCV64Test, J) {
+  // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension.
+  DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::J, -19, 2, "j {imm}\n"), "J");
+}
+
+TEST_F(AssemblerRISCV64Test, JalRA) {
+  // TODO(riscv64): Change "-19, 2" to "-20, 1" for "C" Standard Extension.
+  DriverStr(RepeatIbS<int32_t>(&Riscv64Assembler::Jal, -19, 2, "jal {imm}\n"), "JalRA");
+}
+
+TEST_F(AssemblerRISCV64Test, Jr) {
+  DriverStr(RepeatR(&Riscv64Assembler::Jr, "jr {reg}\n"), "Jr");
+}
+
+TEST_F(AssemblerRISCV64Test, JalrRA) {
+  DriverStr(RepeatR(&Riscv64Assembler::Jalr, "jalr {reg}\n"), "JalrRA");
+}
+
+TEST_F(AssemblerRISCV64Test, Jalr0) {
+  DriverStr(RepeatRR(&Riscv64Assembler::Jalr, "jalr {reg1}, {reg2}\n"), "Jalr0");
+}
+
+TEST_F(AssemblerRISCV64Test, Ret) {
+  __ Ret();
+  DriverStr("ret\n", "Ret");
+}
+
+TEST_F(AssemblerRISCV64Test, RdCycle) {
+  DriverStr(RepeatR(&Riscv64Assembler::RdCycle, "rdcycle {reg}\n"), "RdCycle");
+}
+
+TEST_F(AssemblerRISCV64Test, RdTime) {
+  DriverStr(RepeatR(&Riscv64Assembler::RdTime, "rdtime {reg}\n"), "RdTime");
+}
+
+TEST_F(AssemblerRISCV64Test, RdInstret) {
+  DriverStr(RepeatR(&Riscv64Assembler::RdInstret, "rdinstret {reg}\n"), "RdInstret");
+}
+
+TEST_F(AssemblerRISCV64Test, Csrr) {
+  TestCsrrXMacro(
+      "Csrr", "csrr {reg}, {csr}", [&](uint32_t csr, XRegister rd) { __ Csrr(rd, csr); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrw) {
+  TestCsrrXMacro(
+      "Csrw", "csrw {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrw(csr, rs); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrs) {
+  TestCsrrXMacro(
+      "Csrs", "csrs {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrs(csr, rs); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrc) {
+  TestCsrrXMacro(
+      "Csrc", "csrc {csr}, {reg}", [&](uint32_t csr, XRegister rs) { __ Csrc(csr, rs); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrwi) {
+  TestCsrrXiMacro(
+      "Csrwi", "csrwi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrwi(csr, uimm); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrsi) {
+  TestCsrrXiMacro(
+      "Csrsi", "csrsi {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrsi(csr, uimm); });
+}
+
+TEST_F(AssemblerRISCV64Test, Csrci) {
+  TestCsrrXiMacro(
+      "Csrci", "csrci {csr}, {uimm}", [&](uint32_t csr, uint32_t uimm) { __ Csrci(csr, uimm); });
+}
+
+TEST_F(AssemblerRISCV64Test, LoadConst32) {
+  // `LoadConst32()` emits the same code sequences as `Li()` for 32-bit values.
+  ScratchRegisterScope srs(GetAssembler());
+  srs.ExcludeXRegister(TMP);
+  srs.ExcludeXRegister(TMP2);
+  DriverStr(RepeatRIb(&Riscv64Assembler::LoadConst32, -32, "li {reg}, {imm}"), "LoadConst32");
+}
+
+TEST_F(AssemblerRISCV64Test, LoadConst64) {
+  SetUseSimpleMarch(true);
+  TestLoadConst64("LoadConst64",
+                  /*can_use_tmp=*/ true,
+                  [&](XRegister rd, int64_t value) { __ LoadConst64(rd, value); });
+}
+
+TEST_F(AssemblerRISCV64Test, AddConst32) {
+  auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) {
+    __ AddConst32(rd, rs1, dchecked_integral_cast<int32_t>(value));
+  };
+  TestAddConst("AddConst32", 32, /*suffix=*/ "w", emit_op);
+}
+
+TEST_F(AssemblerRISCV64Test, AddConst64) {
+  SetUseSimpleMarch(true);
+  auto emit_op = [&](XRegister rd, XRegister rs1, int64_t value) {
+    __ AddConst64(rd, rs1, value);
+  };
+  TestAddConst("AddConst64", 64, /*suffix=*/ "", emit_op);
+}
+
+TEST_F(AssemblerRISCV64Test, BcondForward3KiB) {
+  TestBcondForward("BcondForward3KiB", 3 * KB, "1", GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BcondBackward3KiB) {
+  TestBcondBackward("BcondBackward3KiB", 3 * KB, "1", GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BcondForward5KiB) {
+  TestBcondForward("BcondForward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondBackward5KiB) {
+  TestBcondBackward("BcondBackward5KiB", 5 * KB, "1", GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondForward2MiB) {
+  TestBcondForward("BcondForward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondBackward2MiB) {
+  TestBcondBackward("BcondBackward2MiB", 2 * MB, "1", GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Forward) {
+  TestBeqA0A1Forward("BeqA0A1MaxOffset13Forward",
+                     MaxOffset13ForwardDistance() - /*BEQ*/ 4u,
+                     "1",
+                     GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset13Backward) {
+  TestBeqA0A1Backward("BeqA0A1MaxOffset13Forward",
+                      MaxOffset13BackwardDistance(),
+                      "1",
+                      GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Forward) {
+  TestBeqA0A1Forward("BeqA0A1OverMaxOffset13Forward",
+                     MaxOffset13ForwardDistance() - /*BEQ*/ 4u + /*Exceed max*/ 4u,
+                     "1",
+                     GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset13Backward) {
+  TestBeqA0A1Backward("BeqA0A1OverMaxOffset13Forward",
+                      MaxOffset13BackwardDistance() + /*Exceed max*/ 4u,
+                      "1",
+                      GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Forward) {
+  TestBeqA0A1Forward("BeqA0A1MaxOffset21Forward",
+                     MaxOffset21ForwardDistance() - /*J*/ 4u,
+                     "1",
+                     GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1MaxOffset21Backward) {
+  TestBeqA0A1Backward("BeqA0A1MaxOffset21Backward",
+                      MaxOffset21BackwardDistance() - /*BNE*/ 4u,
+                      "1",
+                      GetPrintBcondOppositeAndJ("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Forward) {
+  TestBeqA0A1Forward("BeqA0A1OverMaxOffset21Forward",
+                     MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u,
+                     "1",
+                     GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1OverMaxOffset21Backward) {
+  TestBeqA0A1Backward("BeqA0A1OverMaxOffset21Backward",
+                      MaxOffset21BackwardDistance() - /*BNE*/ 4u + /*Exceed max*/ 4u,
+                      "1",
+                      GetPrintBcondOppositeAndTail("2", "3"));
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1AlmostCascade) {
+  TestBeqA0A1MaybeCascade("BeqA0A1AlmostCascade", /*cascade=*/ false, GetPrintBcond());
+}
+
+TEST_F(AssemblerRISCV64Test, BeqA0A1Cascade) {
+  TestBeqA0A1MaybeCascade(
+      "BeqA0A1AlmostCascade", /*cascade=*/ true, GetPrintBcondOppositeAndJ("1"));
+}
+
+TEST_F(AssemblerRISCV64Test, BcondElimination) {
+  Riscv64Label label;
+  __ Bind(&label);
+  __ Nop();
+  for (XRegister reg : GetRegisters()) {
+    __ Bne(reg, reg, &label);
+    __ Blt(reg, reg, &label);
+    __ Bgt(reg, reg, &label);
+    __ Bltu(reg, reg, &label);
+    __ Bgtu(reg, reg, &label);
+  }
+  DriverStr("nop\n", "BcondElimination");
+}
+
+TEST_F(AssemblerRISCV64Test, BcondUnconditional) {
+  Riscv64Label label;
+  __ Bind(&label);
+  __ Nop();
+  for (XRegister reg : GetRegisters()) {
+    __ Beq(reg, reg, &label);
+    __ Bge(reg, reg, &label);
+    __ Ble(reg, reg, &label);
+    __ Bleu(reg, reg, &label);
+    __ Bgeu(reg, reg, &label);
+  }
+  std::string expected =
+      "1:\n"
+      "nop\n" +
+      RepeatInsn(5u * GetRegisters().size(), "j 1b\n", []() {});
+  DriverStr(expected, "BcondUnconditional");
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdForward3KiB) {
+  TestJalRdForward("JalRdForward3KiB", 3 * KB, "1", GetPrintJalRd());
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdBackward3KiB) {
+  TestJalRdBackward("JalRdBackward3KiB", 3 * KB, "1", GetPrintJalRd());
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdForward2MiB) {
+  TestJalRdForward("JalRdForward2MiB", 2 * MB, "1", GetPrintCallRd("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JalRdBackward2MiB) {
+  TestJalRdBackward("JalRdBackward2MiB", 2 * MB, "1", GetPrintCallRd("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JForward3KiB) {
+  TestBuncondForward("JForward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JBackward3KiB) {
+  TestBuncondBackward("JBackward3KiB", 3 * KB, "1", GetEmitJ(), GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JForward2MiB) {
+  TestBuncondForward("JForward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JBackward2MiB) {
+  TestBuncondBackward("JBackward2MiB", 2 * MB, "1", GetEmitJ(), GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JMaxOffset21Forward) {
+  TestBuncondForward("JMaxOffset21Forward",
+                     MaxOffset21ForwardDistance() - /*J*/ 4u,
+                     "1",
+                     GetEmitJ(),
+                     GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JMaxOffset21Backward) {
+  TestBuncondBackward("JMaxOffset21Backward",
+                      MaxOffset21BackwardDistance(),
+                      "1",
+                      GetEmitJ(),
+                      GetPrintJ());
+}
+
+TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Forward) {
+  TestBuncondForward("JOverMaxOffset21Forward",
+                     MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u,
+                     "1",
+                     GetEmitJ(),
+                     GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, JOverMaxOffset21Backward) {
+  TestBuncondBackward("JMaxOffset21Backward",
+                      MaxOffset21BackwardDistance() + /*Exceed max*/ 4u,
+                      "1",
+                      GetEmitJ(),
+                      GetPrintTail("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallForward3KiB) {
+  TestBuncondForward("CallForward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallBackward3KiB) {
+  TestBuncondBackward("CallBackward3KiB", 3 * KB, "1", GetEmitJal(), GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallForward2MiB) {
+  TestBuncondForward("CallForward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallBackward2MiB) {
+  TestBuncondBackward("CallBackward2MiB", 2 * MB, "1", GetEmitJal(), GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallMaxOffset21Forward) {
+  TestBuncondForward("CallMaxOffset21Forward",
+                     MaxOffset21ForwardDistance() - /*J*/ 4u,
+                     "1",
+                     GetEmitJal(),
+                     GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallMaxOffset21Backward) {
+  TestBuncondBackward("CallMaxOffset21Backward",
+                      MaxOffset21BackwardDistance(),
+                      "1",
+                      GetEmitJal(),
+                      GetPrintJal());
+}
+
+TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Forward) {
+  TestBuncondForward("CallOverMaxOffset21Forward",
+                     MaxOffset21ForwardDistance() - /*J*/ 4u + /*Exceed max*/ 4u,
+                     "1",
+                     GetEmitJal(),
+                     GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, CallOverMaxOffset21Backward) {
+  TestBuncondBackward("CallMaxOffset21Backward",
+                      MaxOffset21BackwardDistance() + /*Exceed max*/ 4u,
+                      "1",
+                      GetEmitJal(),
+                      GetPrintCall("2"));
+}
+
+TEST_F(AssemblerRISCV64Test, Loadb) {
+  TestLoadStoreArbitraryOffset("Loadb", "lb", &Riscv64Assembler::Loadb, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadh) {
+  TestLoadStoreArbitraryOffset("Loadh", "lh", &Riscv64Assembler::Loadh, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadw) {
+  TestLoadStoreArbitraryOffset("Loadw", "lw", &Riscv64Assembler::Loadw, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadd) {
+  TestLoadStoreArbitraryOffset("Loadd", "ld", &Riscv64Assembler::Loadd, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadbu) {
+  TestLoadStoreArbitraryOffset("Loadbu", "lbu", &Riscv64Assembler::Loadbu, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadhu) {
+  TestLoadStoreArbitraryOffset("Loadhu", "lhu", &Riscv64Assembler::Loadhu, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Loadwu) {
+  TestLoadStoreArbitraryOffset("Loadwu", "lwu", &Riscv64Assembler::Loadwu, /*is_store=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, Storeb) {
+  TestLoadStoreArbitraryOffset("Storeb", "sb", &Riscv64Assembler::Storeb, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, Storeh) {
+  TestLoadStoreArbitraryOffset("Storeh", "sh", &Riscv64Assembler::Storeh, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, Storew) {
+  TestLoadStoreArbitraryOffset("Storew", "sw", &Riscv64Assembler::Storew, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, Stored) {
+  TestLoadStoreArbitraryOffset("Stored", "sd", &Riscv64Assembler::Stored, /*is_store=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, FLoadw) {
+  TestFPLoadStoreArbitraryOffset("FLoadw", "flw", &Riscv64Assembler::FLoadw);
+}
+
+TEST_F(AssemblerRISCV64Test, FLoadd) {
+  TestFPLoadStoreArbitraryOffset("FLoadd", "fld", &Riscv64Assembler::FLoadd);
+}
+
+TEST_F(AssemblerRISCV64Test, FStorew) {
+  TestFPLoadStoreArbitraryOffset("FStorew", "fsw", &Riscv64Assembler::FStorew);
+}
+
+TEST_F(AssemblerRISCV64Test, FStored) {
+  TestFPLoadStoreArbitraryOffset("FStored", "fsd", &Riscv64Assembler::FStored);
+}
+
+TEST_F(AssemblerRISCV64Test, Unimp) {
+  __ Unimp();
+  DriverStr("unimp\n", "Unimp");
+}
+
+TEST_F(AssemblerRISCV64Test, LoadLabelAddress) {
+  std::string expected;
+  constexpr size_t kNumLoadsForward = 4 * KB;
+  constexpr size_t kNumLoadsBackward = 4 * KB;
+  Riscv64Label label;
+  auto emit_batch = [&](size_t num_loads, const std::string& target_label) {
+    for (size_t i = 0; i != num_loads; ++i) {
+      // Cycle through non-Zero registers.
+      XRegister rd = enum_cast<XRegister>((i % (kNumberOfXRegisters - 1)) + 1);
+      DCHECK_NE(rd, Zero);
+      std::string rd_name = GetRegisterName(rd);
+      __ LoadLabelAddress(rd, &label);
+      expected += "1:\n";
+      expected += ART_FORMAT("auipc {}, %pcrel_hi({})\n", rd_name, target_label);
+      expected += ART_FORMAT("addi {}, {}, %pcrel_lo(1b)\n", rd_name, rd_name);
+    }
+  };
+  emit_batch(kNumLoadsForward, "2f");
+  __ Bind(&label);
+  expected += "2:\n";
+  emit_batch(kNumLoadsBackward, "2b");
+  DriverStr(expected, "LoadLabelAddress");
+}
+
+TEST_F(AssemblerRISCV64Test, LoadLiteralWithPaddingForLong) {
+  TestLoadLiteral("LoadLiteralWithPaddingForLong", /*with_padding_for_long=*/ true);
+}
+
+TEST_F(AssemblerRISCV64Test, LoadLiteralWithoutPaddingForLong) {
+  TestLoadLiteral("LoadLiteralWithoutPaddingForLong", /*with_padding_for_long=*/ false);
+}
+
+TEST_F(AssemblerRISCV64Test, JumpTable) {
+  std::string expected;
+  expected += EmitNops(sizeof(uint32_t));
+  Riscv64Label targets[4];
+  uint32_t target_locations[4];
+  JumpTable* jump_table = __ CreateJumpTable(ArenaVector<Riscv64Label*>(
+      {&targets[0], &targets[1], &targets[2], &targets[3]}, __ GetAllocator()->Adapter()));
+  for (size_t i : {0, 1, 2, 3}) {
+    target_locations[i] = __ CodeSize();
+    __ Bind(&targets[i]);
+    expected += std::to_string(i) + ":\n";
+    expected += EmitNops(sizeof(uint32_t));
+  }
+  __ LoadLabelAddress(A0, jump_table->GetLabel());
+  expected += "4:\n"
+              "auipc a0, %pcrel_hi(5f)\n"
+              "addi a0, a0, %pcrel_lo(4b)\n";
+  expected += EmitNops(sizeof(uint32_t));
+  uint32_t label5_location = __ CodeSize();
+  auto target_offset = [&](size_t i) {
+    // Even with `-mno-relax`, clang assembler does not fully resolve `.4byte 0b - 5b`
+    // and emits a relocation, so we need to calculate target offsets ourselves.
+    return std::to_string(static_cast<int64_t>(target_locations[i] - label5_location));
+  };
+  expected += "5:\n"
+              ".4byte " + target_offset(0) + "\n"
+              ".4byte " + target_offset(1) + "\n"
+              ".4byte " + target_offset(2) + "\n"
+              ".4byte " + target_offset(3) + "\n";
+  DriverStr(expected, "JumpTable");
+}
+
+TEST_F(AssemblerRISCV64Test, ScratchRegisters) {
+  ScratchRegisterScope srs(GetAssembler());
+  ASSERT_EQ(2u, srs.AvailableXRegisters());  // Default: TMP(T6) and TMP2(T5).
+  ASSERT_EQ(1u, srs.AvailableFRegisters());  // Default: FTMP(FT11).
+
+  XRegister tmp = srs.AllocateXRegister();
+  EXPECT_EQ(TMP, tmp);
+  XRegister tmp2 = srs.AllocateXRegister();
+  EXPECT_EQ(TMP2, tmp2);
+  ASSERT_EQ(0u, srs.AvailableXRegisters());
+
+  FRegister ftmp = srs.AllocateFRegister();
+  EXPECT_EQ(FTMP, ftmp);
+  ASSERT_EQ(0u, srs.AvailableFRegisters());
+
+  // Test nesting.
+  srs.FreeXRegister(A0);
+  srs.FreeXRegister(A1);
+  srs.FreeFRegister(FA0);
+  srs.FreeFRegister(FA1);
+  ASSERT_EQ(2u, srs.AvailableXRegisters());
+  ASSERT_EQ(2u, srs.AvailableFRegisters());
+  {
+    ScratchRegisterScope srs2(GetAssembler());
+    ASSERT_EQ(2u, srs2.AvailableXRegisters());
+    ASSERT_EQ(2u, srs2.AvailableFRegisters());
+    XRegister a1 = srs2.AllocateXRegister();
+    EXPECT_EQ(A1, a1);
+    XRegister a0 = srs2.AllocateXRegister();
+    EXPECT_EQ(A0, a0);
+    ASSERT_EQ(0u, srs2.AvailableXRegisters());
+    FRegister fa1 = srs2.AllocateFRegister();
+    EXPECT_EQ(FA1, fa1);
+    FRegister fa0 = srs2.AllocateFRegister();
+    EXPECT_EQ(FA0, fa0);
+    ASSERT_EQ(0u, srs2.AvailableFRegisters());
+  }
+  ASSERT_EQ(2u, srs.AvailableXRegisters());
+  ASSERT_EQ(2u, srs.AvailableFRegisters());
+
+  srs.IncludeXRegister(A0);  // No-op as the register was already available.
+  ASSERT_EQ(2u, srs.AvailableXRegisters());
+  srs.IncludeFRegister(FA0);  // No-op as the register was already available.
+  ASSERT_EQ(2u, srs.AvailableFRegisters());
+  srs.IncludeXRegister(S0);
+  ASSERT_EQ(3u, srs.AvailableXRegisters());
+  srs.IncludeFRegister(FS0);
+  ASSERT_EQ(3u, srs.AvailableFRegisters());
+
+  srs.ExcludeXRegister(S1);  // No-op as the register was not available.
+  ASSERT_EQ(3u, srs.AvailableXRegisters());
+  srs.ExcludeFRegister(FS1);  // No-op as the register was not available.
+  ASSERT_EQ(3u, srs.AvailableFRegisters());
+  srs.ExcludeXRegister(A0);
+  ASSERT_EQ(2u, srs.AvailableXRegisters());
+  srs.ExcludeFRegister(FA0);
+  ASSERT_EQ(2u, srs.AvailableFRegisters());
+}
+
+#undef __
+
+}  // namespace riscv64
+}  // namespace art
diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc
new file mode 100644
index 0000000000..3aeee8a154
--- /dev/null
+++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.cc
@@ -0,0 +1,633 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_macro_assembler_riscv64.h"
+
+#include "base/bit_utils_iterator.h"
+#include "dwarf/register.h"
+#include "entrypoints/quick/quick_entrypoints.h"
+#include "indirect_reference_table.h"
+#include "lock_word.h"
+#include "managed_register_riscv64.h"
+#include "offsets.h"
+#include "thread.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+static constexpr size_t kSpillSize = 8;  // Both GPRs and FPRs
+
+static std::pair<uint32_t, uint32_t> GetCoreAndFpSpillMasks(
+    ArrayRef<const ManagedRegister> callee_save_regs) {
+  uint32_t core_spill_mask = 0u;
+  uint32_t fp_spill_mask = 0u;
+  for (ManagedRegister r : callee_save_regs) {
+    Riscv64ManagedRegister reg = r.AsRiscv64();
+    if (reg.IsXRegister()) {
+      core_spill_mask |= 1u << reg.AsXRegister();
+    } else {
+      DCHECK(reg.IsFRegister());
+      fp_spill_mask |= 1u << reg.AsFRegister();
+    }
+  }
+  DCHECK_EQ(callee_save_regs.size(),
+            dchecked_integral_cast<size_t>(POPCOUNT(core_spill_mask) + POPCOUNT(fp_spill_mask)));
+  return {core_spill_mask, fp_spill_mask};
+}
+
+#define __ asm_.
+
+Riscv64JNIMacroAssembler::~Riscv64JNIMacroAssembler() {
+}
+
+void Riscv64JNIMacroAssembler::FinalizeCode() {
+  __ FinalizeCode();
+}
+
+void Riscv64JNIMacroAssembler::BuildFrame(size_t frame_size,
+                                          ManagedRegister method_reg,
+                                          ArrayRef<const ManagedRegister> callee_save_regs) {
+  // Increase frame to required size.
+  DCHECK_ALIGNED(frame_size, kStackAlignment);
+  // Must at least have space for Method* if we're going to spill it.
+  DCHECK_GE(frame_size,
+            (callee_save_regs.size() + (method_reg.IsRegister() ? 1u : 0u)) * kSpillSize);
+  IncreaseFrameSize(frame_size);
+
+  // Save callee-saves.
+  auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs);
+  size_t offset = frame_size;
+  if ((core_spill_mask & (1u << RA)) != 0u) {
+    offset -= kSpillSize;
+    __ Stored(RA, SP, offset);
+    __ cfi().RelOffset(dwarf::Reg::Riscv64Core(RA), offset);
+  }
+  for (uint32_t reg : HighToLowBits(core_spill_mask & ~(1u << RA))) {
+    offset -= kSpillSize;
+    __ Stored(enum_cast<XRegister>(reg), SP, offset);
+    __ cfi().RelOffset(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg)), offset);
+  }
+  for (uint32_t reg : HighToLowBits(fp_spill_mask)) {
+    offset -= kSpillSize;
+    __ FStored(enum_cast<FRegister>(reg), SP, offset);
+    __ cfi().RelOffset(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg)), offset);
+  }
+
+  if (method_reg.IsRegister()) {
+    // Write ArtMethod*.
+    DCHECK_EQ(A0, method_reg.AsRiscv64().AsXRegister());
+    __ Stored(A0, SP, 0);
+  }
+}
+
+void Riscv64JNIMacroAssembler::RemoveFrame(size_t frame_size,
+                                           ArrayRef<const ManagedRegister> callee_save_regs,
+                                           [[maybe_unused]] bool may_suspend) {
+  cfi().RememberState();
+
+  // Restore callee-saves.
+  auto [core_spill_mask, fp_spill_mask] = GetCoreAndFpSpillMasks(callee_save_regs);
+  size_t offset = frame_size - callee_save_regs.size() * kSpillSize;
+  for (uint32_t reg : LowToHighBits(fp_spill_mask)) {
+    __ FLoadd(enum_cast<FRegister>(reg), SP, offset);
+    __ cfi().Restore(dwarf::Reg::Riscv64Fp(enum_cast<FRegister>(reg)));
+    offset += kSpillSize;
+  }
+  for (uint32_t reg : LowToHighBits(core_spill_mask & ~(1u << RA))) {
+    __ Loadd(enum_cast<XRegister>(reg), SP, offset);
+    __ cfi().Restore(dwarf::Reg::Riscv64Core(enum_cast<XRegister>(reg)));
+    offset += kSpillSize;
+  }
+  if ((core_spill_mask & (1u << RA)) != 0u) {
+    __ Loadd(RA, SP, offset);
+    __ cfi().Restore(dwarf::Reg::Riscv64Core(RA));
+    offset += kSpillSize;
+  }
+  DCHECK_EQ(offset, frame_size);
+
+  // Decrease the frame size.
+  DecreaseFrameSize(frame_size);
+
+  // Return to RA.
+  __ Ret();
+
+  // The CFI should be restored for any code that follows the exit block.
+  __ cfi().RestoreState();
+  __ cfi().DefCFAOffset(frame_size);
+}
+
+void Riscv64JNIMacroAssembler::IncreaseFrameSize(size_t adjust) {
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kStackAlignment);
+    int64_t adjustment = dchecked_integral_cast<int64_t>(adjust);
+    __ AddConst64(SP, SP, -adjustment);
+    __ cfi().AdjustCFAOffset(adjustment);
+  }
+}
+
+void Riscv64JNIMacroAssembler::DecreaseFrameSize(size_t adjust) {
+  if (adjust != 0u) {
+    CHECK_ALIGNED(adjust, kStackAlignment);
+    int64_t adjustment = dchecked_integral_cast<int64_t>(adjust);
+    __ AddConst64(SP, SP, adjustment);
+    __ cfi().AdjustCFAOffset(-adjustment);
+  }
+}
+
+ManagedRegister Riscv64JNIMacroAssembler::CoreRegisterWithSize(ManagedRegister src, size_t size) {
+  DCHECK(src.AsRiscv64().IsXRegister());
+  DCHECK(size == 4u || size == 8u) << size;
+  return src;
+}
+
+void Riscv64JNIMacroAssembler::Store(FrameOffset offs, ManagedRegister m_src, size_t size) {
+  Store(Riscv64ManagedRegister::FromXRegister(SP), MemberOffset(offs.Int32Value()), m_src, size);
+}
+
+void Riscv64JNIMacroAssembler::Store(ManagedRegister m_base,
+                                     MemberOffset offs,
+                                     ManagedRegister m_src,
+                                     size_t size) {
+  Riscv64ManagedRegister base = m_base.AsRiscv64();
+  Riscv64ManagedRegister src = m_src.AsRiscv64();
+  if (src.IsXRegister()) {
+    if (size == 4u) {
+      __ Storew(src.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+    } else {
+      CHECK_EQ(8u, size);
+      __ Stored(src.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+    }
+  } else {
+    CHECK(src.IsFRegister()) << src;
+    if (size == 4u) {
+      __ FStorew(src.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+    } else {
+      CHECK_EQ(8u, size);
+      __ FStored(src.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+    }
+  }
+}
+
+void Riscv64JNIMacroAssembler::StoreRawPtr(FrameOffset offs, ManagedRegister m_src) {
+  Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP);
+  Store(sp, MemberOffset(offs.Int32Value()), m_src, static_cast<size_t>(kRiscv64PointerSize));
+}
+
+void Riscv64JNIMacroAssembler::StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) {
+  XRegister src = SP;
+  ScratchRegisterScope srs(&asm_);
+  if (tag_sp) {
+    XRegister tmp = srs.AllocateXRegister();
+    __ Ori(tmp, SP, 0x2);
+    src = tmp;
+  }
+  __ Stored(src, TR, offs.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest, FrameOffset offs, size_t size) {
+  Riscv64ManagedRegister sp = Riscv64ManagedRegister::FromXRegister(SP);
+  Load(m_dest, sp, MemberOffset(offs.Int32Value()), size);
+}
+
+void Riscv64JNIMacroAssembler::Load(ManagedRegister m_dest,
+                                    ManagedRegister m_base,
+                                    MemberOffset offs,
+                                    size_t size) {
+  Riscv64ManagedRegister base = m_base.AsRiscv64();
+  Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+  if (dest.IsXRegister()) {
+    if (size == 4u) {
+      // The riscv64 native calling convention specifies that integers narrower than XLEN (64)
+      // bits are "widened according to the sign of their type up to 32 bits, then sign-extended
+      // to XLEN bits." The managed ABI already passes integral values this way in registers
+      // and correctly widened to 32 bits on the stack. The `Load()` must sign-extend narrower
+      // types here to pass integral values correctly to the native call.
+      // For `float` args, the upper 32 bits are undefined, so this is fine for them as well.
+      __ Loadw(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+    } else {
+      CHECK_EQ(8u, size);
+      __ Loadd(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+    }
+  } else {
+    CHECK(dest.IsFRegister()) << dest;
+    if (size == 4u) {
+      __ FLoadw(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+    } else {
+      CHECK_EQ(8u, size);
+      __ FLoadd(dest.AsFRegister(), base.AsXRegister(), offs.Int32Value());
+    }
+  }
+}
+
+void Riscv64JNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dest, ThreadOffset64 offs) {
+  Riscv64ManagedRegister tr = Riscv64ManagedRegister::FromXRegister(TR);
+  Load(m_dest, tr, MemberOffset(offs.Int32Value()), static_cast<size_t>(kRiscv64PointerSize));
+}
+
+void Riscv64JNIMacroAssembler::LoadGcRootWithoutReadBarrier(ManagedRegister m_dest,
+                                                            ManagedRegister m_base,
+                                                            MemberOffset offs) {
+  Riscv64ManagedRegister base = m_base.AsRiscv64();
+  Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+  __ Loadwu(dest.AsXRegister(), base.AsXRegister(), offs.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::MoveArguments(ArrayRef<ArgumentLocation> dests,
+                                             ArrayRef<ArgumentLocation> srcs,
+                                             ArrayRef<FrameOffset> refs) {
+  size_t arg_count = dests.size();
+  DCHECK_EQ(arg_count, srcs.size());
+  DCHECK_EQ(arg_count, refs.size());
+
+  auto get_mask = [](ManagedRegister reg) -> uint64_t {
+    Riscv64ManagedRegister riscv64_reg = reg.AsRiscv64();
+    if (riscv64_reg.IsXRegister()) {
+      size_t core_reg_number = static_cast<size_t>(riscv64_reg.AsXRegister());
+      DCHECK_LT(core_reg_number, 32u);
+      return UINT64_C(1) << core_reg_number;
+    } else {
+      DCHECK(riscv64_reg.IsFRegister());
+      size_t fp_reg_number = static_cast<size_t>(riscv64_reg.AsFRegister());
+      DCHECK_LT(fp_reg_number, 32u);
+      return (UINT64_C(1) << 32u) << fp_reg_number;
+    }
+  };
+
+  // Collect registers to move while storing/copying args to stack slots.
+  // Convert processed references to `jobject`.
+  uint64_t src_regs = 0u;
+  uint64_t dest_regs = 0u;
+  for (size_t i = 0; i != arg_count; ++i) {
+    const ArgumentLocation& src = srcs[i];
+    const ArgumentLocation& dest = dests[i];
+    const FrameOffset ref = refs[i];
+    if (ref != kInvalidReferenceOffset) {
+      DCHECK_EQ(src.GetSize(), kObjectReferenceSize);
+      DCHECK_EQ(dest.GetSize(), static_cast<size_t>(kRiscv64PointerSize));
+    } else {
+      DCHECK(src.GetSize() == 4u || src.GetSize() == 8u) << src.GetSize();
+      DCHECK(dest.GetSize() == 4u || dest.GetSize() == 8u) << dest.GetSize();
+      DCHECK_LE(src.GetSize(), dest.GetSize());
+    }
+    if (dest.IsRegister()) {
+      if (src.IsRegister() && src.GetRegister().Equals(dest.GetRegister())) {
+        // No move is necessary but we may need to convert a reference to a `jobject`.
+        if (ref != kInvalidReferenceOffset) {
+          CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ i != 0u);
+        }
+      } else {
+        if (src.IsRegister()) {
+          src_regs |= get_mask(src.GetRegister());
+        }
+        dest_regs |= get_mask(dest.GetRegister());
+      }
+    } else {
+      ScratchRegisterScope srs(&asm_);
+      Riscv64ManagedRegister reg = src.IsRegister()
+          ? src.GetRegister().AsRiscv64()
+          : Riscv64ManagedRegister::FromXRegister(srs.AllocateXRegister());
+      if (!src.IsRegister()) {
+        if (ref != kInvalidReferenceOffset) {
+          // We're loading the reference only for comparison with null, so it does not matter
+          // if we sign- or zero-extend but let's correctly zero-extend the reference anyway.
+          __ Loadwu(reg.AsRiscv64().AsXRegister(), SP, src.GetFrameOffset().SizeValue());
+        } else {
+          Load(reg, src.GetFrameOffset(), src.GetSize());
+        }
+      }
+      if (ref != kInvalidReferenceOffset) {
+        DCHECK_NE(i, 0u);
+        CreateJObject(reg, ref, reg, /*null_allowed=*/ true);
+      }
+      Store(dest.GetFrameOffset(), reg, dest.GetSize());
+    }
+  }
+
+  // Fill destination registers.
+  // There should be no cycles, so this simple algorithm should make progress.
+  while (dest_regs != 0u) {
+    uint64_t old_dest_regs = dest_regs;
+    for (size_t i = 0; i != arg_count; ++i) {
+      const ArgumentLocation& src = srcs[i];
+      const ArgumentLocation& dest = dests[i];
+      const FrameOffset ref = refs[i];
+      if (!dest.IsRegister()) {
+        continue;  // Stored in first loop above.
+      }
+      uint64_t dest_reg_mask = get_mask(dest.GetRegister());
+      if ((dest_reg_mask & dest_regs) == 0u) {
+        continue;  // Equals source, or already filled in one of previous iterations.
+      }
+      if ((dest_reg_mask & src_regs) != 0u) {
+        continue;  // Cannot clobber this register yet.
+      }
+      if (src.IsRegister()) {
+        if (ref != kInvalidReferenceOffset) {
+          DCHECK_NE(i, 0u);  // The `this` arg remains in the same register (handled above).
+          CreateJObject(dest.GetRegister(), ref, src.GetRegister(), /*null_allowed=*/ true);
+        } else {
+          Move(dest.GetRegister(), src.GetRegister(), dest.GetSize());
+        }
+        src_regs &= ~get_mask(src.GetRegister());  // Allow clobbering source register.
+      } else {
+        Load(dest.GetRegister(), src.GetFrameOffset(), src.GetSize());
+        // No `jobject` conversion needed. There are enough arg registers in managed ABI
+        // to hold all references that yield a register arg `jobject` in native ABI.
+        DCHECK_EQ(ref, kInvalidReferenceOffset);
+      }
+      dest_regs &= ~get_mask(dest.GetRegister());  // Destination register was filled.
+    }
+    CHECK_NE(old_dest_regs, dest_regs);
+    DCHECK_EQ(0u, dest_regs & ~old_dest_regs);
+  }
+}
+
+void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, ManagedRegister m_src, size_t size) {
+  // Note: This function is used only for moving between GPRs.
+  // FP argument registers hold the same arguments in managed and native ABIs.
+  DCHECK(size == 4u || size == 8u) << size;
+  Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+  Riscv64ManagedRegister src = m_src.AsRiscv64();
+  DCHECK(dest.IsXRegister());
+  DCHECK(src.IsXRegister());
+  if (!dest.Equals(src)) {
+    __ Mv(dest.AsXRegister(), src.AsXRegister());
+  }
+}
+
+void Riscv64JNIMacroAssembler::Move(ManagedRegister m_dest, size_t value) {
+  DCHECK(m_dest.AsRiscv64().IsXRegister());
+  __ LoadConst64(m_dest.AsRiscv64().AsXRegister(), dchecked_integral_cast<int64_t>(value));
+}
+
+void Riscv64JNIMacroAssembler::SignExtend([[maybe_unused]] ManagedRegister mreg,
+                                          [[maybe_unused]] size_t size) {
+  LOG(FATAL) << "The result is already sign-extended in the native ABI.";
+  UNREACHABLE();
+}
+
+void Riscv64JNIMacroAssembler::ZeroExtend([[maybe_unused]] ManagedRegister mreg,
+                                          [[maybe_unused]] size_t size) {
+  LOG(FATAL) << "The result is already zero-extended in the native ABI.";
+  UNREACHABLE();
+}
+
+void Riscv64JNIMacroAssembler::GetCurrentThread(ManagedRegister dest) {
+  DCHECK(dest.AsRiscv64().IsXRegister());
+  __ Mv(dest.AsRiscv64().AsXRegister(), TR);
+}
+
+void Riscv64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
+  __ Stored(TR, SP, offset.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::DecodeJNITransitionOrLocalJObject(ManagedRegister m_reg,
+                                                                 JNIMacroLabel* slow_path,
+                                                                 JNIMacroLabel* resume) {
+  // This implements the fast-path of `Thread::DecodeJObject()`.
+  constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask();
+  DCHECK(IsInt<12>(kGlobalOrWeakGlobalMask));
+  constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask();
+  DCHECK(IsInt<12>(kIndirectRefKindMask));
+  XRegister reg = m_reg.AsRiscv64().AsXRegister();
+  __ Beqz(reg, Riscv64JNIMacroLabel::Cast(resume)->AsRiscv64());  // Skip test and load for null.
+  __ Andi(TMP, reg, kGlobalOrWeakGlobalMask);
+  __ Bnez(TMP, Riscv64JNIMacroLabel::Cast(slow_path)->AsRiscv64());
+  __ Andi(reg, reg, ~kIndirectRefKindMask);
+  __ Loadw(reg, reg, 0);
+}
+
+void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] ManagedRegister m_src,
+                                            [[maybe_unused]] bool could_be_null) {
+  // TODO: not validating references.
+}
+
+void Riscv64JNIMacroAssembler::VerifyObject([[maybe_unused]] FrameOffset src,
+                                            [[maybe_unused]] bool could_be_null) {
+  // TODO: not validating references.
+}
+
+void Riscv64JNIMacroAssembler::Jump(ManagedRegister m_base, Offset offs) {
+  Riscv64ManagedRegister base = m_base.AsRiscv64();
+  CHECK(base.IsXRegister()) << base;
+  ScratchRegisterScope srs(&asm_);
+  XRegister tmp = srs.AllocateXRegister();
+  __ Loadd(tmp, base.AsXRegister(), offs.Int32Value());
+  __ Jr(tmp);
+}
+
+void Riscv64JNIMacroAssembler::Call(ManagedRegister m_base, Offset offs) {
+  Riscv64ManagedRegister base = m_base.AsRiscv64();
+  CHECK(base.IsXRegister()) << base;
+  __ Loadd(RA, base.AsXRegister(), offs.Int32Value());
+  __ Jalr(RA);
+}
+
+
+void Riscv64JNIMacroAssembler::CallFromThread(ThreadOffset64 offset) {
+  Call(Riscv64ManagedRegister::FromXRegister(TR), offset);
+}
+
+void Riscv64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
+    JNIMacroLabel* label,
+    ArrayRef<const ManagedRegister> scratch_regs) {
+  constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+  constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+  constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+  constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset =
+      Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock);
+
+  DCHECK_GE(scratch_regs.size(), 2u);
+  XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister();
+  XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister();
+
+  // CAS release, old_value = kRunnableStateValue, new_value = kNativeStateValue, no flags.
+  Riscv64Label retry;
+  __ Bind(&retry);
+  static_assert(thread_flags_offset.Int32Value() == 0);  // LR/SC require exact address.
+  __ LrW(scratch, TR, AqRl::kNone);
+  __ Li(scratch2, kNativeStateValue);
+  // If any flags are set, go to the slow path.
+  static_assert(kRunnableStateValue == 0u);
+  __ Bnez(scratch, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+  __ ScW(scratch, scratch2, TR, AqRl::kRelease);
+  __ Bnez(scratch, &retry);
+
+  // Clear `self->tlsPtr_.held_mutexes[kMutatorLock]`.
+  __ Stored(Zero, TR, thread_held_mutex_mutator_lock_offset.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::TryToTransitionFromNativeToRunnable(
+    JNIMacroLabel* label,
+    ArrayRef<const ManagedRegister> scratch_regs,
+    ManagedRegister return_reg) {
+  constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+  constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+  constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+  constexpr ThreadOffset64 thread_held_mutex_mutator_lock_offset =
+      Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock);
+  constexpr ThreadOffset64 thread_mutator_lock_offset =
+      Thread::MutatorLockOffset<kRiscv64PointerSize>();
+
+  DCHECK_GE(scratch_regs.size(), 2u);
+  DCHECK(!scratch_regs[0].AsRiscv64().Overlaps(return_reg.AsRiscv64()));
+  XRegister scratch = scratch_regs[0].AsRiscv64().AsXRegister();
+  DCHECK(!scratch_regs[1].AsRiscv64().Overlaps(return_reg.AsRiscv64()));
+  XRegister scratch2 = scratch_regs[1].AsRiscv64().AsXRegister();
+
+  // CAS acquire, old_value = kNativeStateValue, new_value = kRunnableStateValue, no flags.
+  Riscv64Label retry;
+  __ Bind(&retry);
+  static_assert(thread_flags_offset.Int32Value() == 0);  // LR/SC require exact address.
+  __ LrW(scratch, TR, AqRl::kAcquire);
+  __ Li(scratch2, kNativeStateValue);
+  // If any flags are set, or the state is not Native, go to the slow path.
+  // (While the thread can theoretically transition between different Suspended states,
+  // it would be very unexpected to see a state other than Native at this point.)
+  __ Bne(scratch, scratch2, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+  static_assert(kRunnableStateValue == 0u);
+  __ ScW(scratch, Zero, TR, AqRl::kNone);
+  __ Bnez(scratch, &retry);
+
+  // Set `self->tlsPtr_.held_mutexes[kMutatorLock]` to the mutator lock.
+  __ Loadd(scratch, TR, thread_mutator_lock_offset.Int32Value());
+  __ Stored(scratch, TR, thread_held_mutex_mutator_lock_offset.Int32Value());
+}
+
+void Riscv64JNIMacroAssembler::SuspendCheck(JNIMacroLabel* label) {
+  ScratchRegisterScope srs(&asm_);
+  XRegister tmp = srs.AllocateXRegister();
+  __ Loadw(tmp, TR, Thread::ThreadFlagsOffset<kRiscv64PointerSize>().Int32Value());
+  DCHECK(IsInt<12>(dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags())));
+  __ Andi(tmp, tmp, dchecked_integral_cast<int32_t>(Thread::SuspendOrCheckpointRequestFlags()));
+  __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+}
+
+void Riscv64JNIMacroAssembler::ExceptionPoll(JNIMacroLabel* label) {
+  ScratchRegisterScope srs(&asm_);
+  XRegister tmp = srs.AllocateXRegister();
+  __ Loadd(tmp, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value());
+  __ Bnez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+}
+
+void Riscv64JNIMacroAssembler::DeliverPendingException() {
+  // Pass exception object as argument.
+  // Don't care about preserving A0 as this won't return.
+  // Note: The scratch register from `ExceptionPoll()` may have been clobbered.
+  __ Loadd(A0, TR, Thread::ExceptionOffset<kRiscv64PointerSize>().Int32Value());
+  __ Loadd(RA, TR, QUICK_ENTRYPOINT_OFFSET(kRiscv64PointerSize, pDeliverException).Int32Value());
+  __ Jalr(RA);
+  // Call should never return.
+  __ Unimp();
+}
+
+std::unique_ptr<JNIMacroLabel> Riscv64JNIMacroAssembler::CreateLabel() {
+  return std::unique_ptr<JNIMacroLabel>(new Riscv64JNIMacroLabel());
+}
+
+void Riscv64JNIMacroAssembler::Jump(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ J(down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+}
+
+void Riscv64JNIMacroAssembler::TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) {
+  CHECK(label != nullptr);
+
+  DCHECK_EQ(Thread::IsGcMarkingSize(), 4u);
+
+  ScratchRegisterScope srs(&asm_);
+  XRegister test_reg = srs.AllocateXRegister();
+  int32_t is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>().Int32Value();
+  __ Loadw(test_reg, TR, is_gc_marking_offset);
+  switch (cond) {
+    case JNIMacroUnaryCondition::kZero:
+      __ Beqz(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+      break;
+    case JNIMacroUnaryCondition::kNotZero:
+      __ Bnez(test_reg, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+      break;
+    default:
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
+      UNREACHABLE();
+  }
+}
+
+void Riscv64JNIMacroAssembler::TestMarkBit(ManagedRegister m_ref,
+                                           JNIMacroLabel* label,
+                                           JNIMacroUnaryCondition cond) {
+  XRegister ref = m_ref.AsRiscv64().AsXRegister();
+  ScratchRegisterScope srs(&asm_);
+  XRegister tmp = srs.AllocateXRegister();
+  __ Loadw(tmp, ref, mirror::Object::MonitorOffset().Int32Value());
+  // Move the bit we want to check to the sign bit, so that we can use BGEZ/BLTZ
+  // to check it. Extracting the bit for BEQZ/BNEZ would require one more instruction.
+  static_assert(LockWord::kMarkBitStateSize == 1u);
+  __ Slliw(tmp, tmp, 31 - LockWord::kMarkBitStateShift);
+  switch (cond) {
+    case JNIMacroUnaryCondition::kZero:
+      __ Bgez(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+      break;
+    case JNIMacroUnaryCondition::kNotZero:
+      __ Bltz(tmp, Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+      break;
+    default:
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(cond);
+      UNREACHABLE();
+  }
+}
+
+void Riscv64JNIMacroAssembler::TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) {
+  int32_t small_offset = dchecked_integral_cast<int32_t>(address & 0xfff) -
+                         dchecked_integral_cast<int32_t>((address & 0x800) << 1);
+  int64_t remainder = static_cast<int64_t>(address) - small_offset;
+  ScratchRegisterScope srs(&asm_);
+  XRegister tmp = srs.AllocateXRegister();
+  __ LoadConst64(tmp, remainder);
+  __ Lb(tmp, tmp, small_offset);
+  __ Bnez(tmp, down_cast<Riscv64Label*>(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64()));
+}
+
+void Riscv64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ Bind(Riscv64JNIMacroLabel::Cast(label)->AsRiscv64());
+}
+
+void Riscv64JNIMacroAssembler::CreateJObject(ManagedRegister m_dest,
+                                             FrameOffset spilled_reference_offset,
+                                             ManagedRegister m_ref,
+                                             bool null_allowed) {
+  Riscv64ManagedRegister dest = m_dest.AsRiscv64();
+  Riscv64ManagedRegister ref = m_ref.AsRiscv64();
+  DCHECK(dest.IsXRegister());
+  DCHECK(ref.IsXRegister());
+
+  Riscv64Label null_label;
+  if (null_allowed) {
+    if (!dest.Equals(ref)) {
+      __ Li(dest.AsXRegister(), 0);
+    }
+    __ Beqz(ref.AsXRegister(), &null_label);
+  }
+  __ AddConst64(dest.AsXRegister(), SP, spilled_reference_offset.Int32Value());
+  if (null_allowed) {
+    __ Bind(&null_label);
+  }
+}
+
+#undef __
+
+}  // namespace riscv64
+}  // namespace art
diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64.h b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h
new file mode 100644
index 0000000000..79618e2c8e
--- /dev/null
+++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_
+#define ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_
+
+#include <stdint.h>
+#include <memory>
+#include <vector>
+
+#include <android-base/logging.h>
+
+#include "assembler_riscv64.h"
+#include "base/arena_containers.h"
+#include "base/enums.h"
+#include "base/macros.h"
+#include "offsets.h"
+#include "utils/assembler.h"
+#include "utils/jni_macro_assembler.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+class Riscv64JNIMacroAssembler  : public JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64> {
+ public:
+  explicit Riscv64JNIMacroAssembler(ArenaAllocator* allocator)
+      : JNIMacroAssemblerFwd<Riscv64Assembler, PointerSize::k64>(allocator) {}
+  ~Riscv64JNIMacroAssembler();
+
+  // Finalize the code.
+  void FinalizeCode() override;
+
+  // Emit code that will create an activation on the stack.
+  void BuildFrame(size_t frame_size,
+                  ManagedRegister method_reg,
+                  ArrayRef<const ManagedRegister> callee_save_regs) override;
+
+  // Emit code that will remove an activation from the stack.
+  void RemoveFrame(size_t frame_size,
+                   ArrayRef<const ManagedRegister> callee_save_regs,
+                   bool may_suspend) override;
+
+  void IncreaseFrameSize(size_t adjust) override;
+  void DecreaseFrameSize(size_t adjust) override;
+
+  ManagedRegister CoreRegisterWithSize(ManagedRegister src, size_t size) override;
+
+  // Store routines.
+  void Store(FrameOffset offs, ManagedRegister src, size_t size) override;
+  void Store(ManagedRegister base, MemberOffset offs, ManagedRegister src, size_t size) override;
+  void StoreRawPtr(FrameOffset offs, ManagedRegister src) override;
+  void StoreStackPointerToThread(ThreadOffset64 offs, bool tag_sp) override;
+
+  // Load routines.
+  void Load(ManagedRegister dest, FrameOffset offs, size_t size) override;
+  void Load(ManagedRegister dest, ManagedRegister base, MemberOffset offs, size_t size) override;
+  void LoadRawPtrFromThread(ManagedRegister dest, ThreadOffset64 offs) override;
+  void LoadGcRootWithoutReadBarrier(ManagedRegister dest,
+                                    ManagedRegister base,
+                                    MemberOffset offs) override;
+
+  // Copying routines.
+  void MoveArguments(ArrayRef<ArgumentLocation> dests,
+                     ArrayRef<ArgumentLocation> srcs,
+                     ArrayRef<FrameOffset> refs) override;
+  void Move(ManagedRegister dest, ManagedRegister src, size_t size) override;
+  void Move(ManagedRegister dest, size_t value) override;
+
+  // Sign extension.
+  void SignExtend(ManagedRegister mreg, size_t size) override;
+
+  // Zero extension.
+  void ZeroExtend(ManagedRegister mreg, size_t size) override;
+
+  // Exploit fast access in managed code to Thread::Current().
+  void GetCurrentThread(ManagedRegister dest) override;
+  void GetCurrentThread(FrameOffset offset) override;
+
+  // Decode JNI transition or local `jobject`. For (weak) global `jobject`, jump to slow path.
+  void DecodeJNITransitionOrLocalJObject(ManagedRegister reg,
+                                         JNIMacroLabel* slow_path,
+                                         JNIMacroLabel* resume) override;
+
+  // Heap::VerifyObject on src. In some cases (such as a reference to this) we
+  // know that src may not be null.
+  void VerifyObject(ManagedRegister src, bool could_be_null) override;
+  void VerifyObject(FrameOffset src, bool could_be_null) override;
+
+  // Jump to address held at [base+offset] (used for tail calls).
+  void Jump(ManagedRegister base, Offset offset) override;
+
+  // Call to address held at [base+offset].
+  void Call(ManagedRegister base, Offset offset) override;
+  void CallFromThread(ThreadOffset64 offset) override;
+
+  // Generate fast-path for transition to Native. Go to `label` if any thread flag is set.
+  // The implementation can use `scratch_regs` which should be callee save core registers
+  // (already saved before this call) and must preserve all argument registers.
+  void TryToTransitionFromRunnableToNative(JNIMacroLabel* label,
+                                           ArrayRef<const ManagedRegister> scratch_regs) override;
+
+  // Generate fast-path for transition to Runnable. Go to `label` if any thread flag is set.
+  // The implementation can use `scratch_regs` which should be core argument registers
+  // not used as return registers and it must preserve the `return_reg` if any.
+  void TryToTransitionFromNativeToRunnable(JNIMacroLabel* label,
+                                           ArrayRef<const ManagedRegister> scratch_regs,
+                                           ManagedRegister return_reg) override;
+
+  // Generate suspend check and branch to `label` if there is a pending suspend request.
+  void SuspendCheck(JNIMacroLabel* label) override;
+
+  // Generate code to check if Thread::Current()->exception_ is non-null
+  // and branch to the `label` if it is.
+  void ExceptionPoll(JNIMacroLabel* label) override;
+  // Deliver pending exception.
+  void DeliverPendingException() override;
+
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() override;
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label) override;
+  // Emit a conditional jump to the label by applying a unary condition test to the GC marking flag.
+  void TestGcMarking(JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+  // Emit a conditional jump to the label by applying a unary condition test to object's mark bit.
+  void TestMarkBit(ManagedRegister ref, JNIMacroLabel* label, JNIMacroUnaryCondition cond) override;
+  // Emit a conditional jump to label if the loaded value from specified locations is not zero.
+  void TestByteAndJumpIfNotZero(uintptr_t address, JNIMacroLabel* label) override;
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label) override;
+
+ private:
+  void CreateJObject(ManagedRegister m_dest,
+                     FrameOffset spilled_reference_offset,
+                     ManagedRegister m_ref,
+                     bool null_allowed);
+
+  ART_FRIEND_TEST(JniMacroAssemblerRiscv64Test, CreateJObject);
+};
+
+class Riscv64JNIMacroLabel final
+    : public JNIMacroLabelCommon<Riscv64JNIMacroLabel,
+                                 Riscv64Label,
+                                 InstructionSet::kRiscv64> {
+ public:
+  Riscv64Label* AsRiscv64() {
+    return AsPlatformLabel();
+  }
+};
+
+}  // namespace riscv64
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_RISCV64_JNI_MACRO_ASSEMBLER_RISCV64_H_
diff --git a/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc
new file mode 100644
index 0000000000..004ba9bb7f
--- /dev/null
+++ b/compiler/utils/riscv64/jni_macro_assembler_riscv64_test.cc
@@ -0,0 +1,959 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <fstream>
+#include <map>
+#include <regex>
+
+#include "gtest/gtest.h"
+
+#include "indirect_reference_table.h"
+#include "lock_word.h"
+#include "jni/quick/calling_convention.h"
+#include "utils/riscv64/jni_macro_assembler_riscv64.h"
+#include "utils/assembler_test_base.h"
+
+#include "base/macros.h"
+#include "base/malloc_arena_pool.h"
+
+namespace art HIDDEN {
+namespace riscv64 {
+
+#define __ assembler_.
+
+class JniMacroAssemblerRiscv64Test : public AssemblerTestBase {
+ public:
+  JniMacroAssemblerRiscv64Test() : pool_(), allocator_(&pool_), assembler_(&allocator_) { }
+
+ protected:
+  InstructionSet GetIsa() override { return InstructionSet::kRiscv64; }
+
+  void DriverStr(const std::string& assembly_text, const std::string& test_name) {
+    assembler_.FinalizeCode();
+    size_t cs = assembler_.CodeSize();
+    std::vector<uint8_t> data(cs);
+    MemoryRegion code(&data[0], data.size());
+    assembler_.CopyInstructions(code);
+    Driver(data, assembly_text, test_name);
+  }
+
+  static Riscv64ManagedRegister AsManaged(XRegister reg) {
+    return Riscv64ManagedRegister::FromXRegister(reg);
+  }
+
+  static Riscv64ManagedRegister AsManaged(FRegister reg) {
+    return Riscv64ManagedRegister::FromFRegister(reg);
+  }
+
+  std::string EmitRet() {
+    __ RemoveFrame(/*frame_size=*/ 0u,
+                   /*callee_save_regs=*/ ArrayRef<const ManagedRegister>(),
+                   /*may_suspend=*/ false);
+    return "ret\n";
+  }
+
+  static const size_t kWordSize = 4u;
+  static const size_t kDoubleWordSize = 8u;
+
+  MallocArenaPool pool_;
+  ArenaAllocator allocator_;
+  Riscv64JNIMacroAssembler assembler_;
+};
+
+TEST_F(JniMacroAssemblerRiscv64Test, StackFrame) {
+  std::string expected;
+
+  std::unique_ptr<JniCallingConvention> jni_conv = JniCallingConvention::Create(
+      &allocator_,
+      /*is_static=*/ false,
+      /*is_synchronized=*/ false,
+      /*is_fast_native=*/ false,
+      /*is_critical_native=*/ false,
+      /*shorty=*/ "V",
+      InstructionSet::kRiscv64);
+  size_t frame_size = jni_conv->FrameSize();
+  ManagedRegister method_reg = AsManaged(A0);
+  ArrayRef<const ManagedRegister> callee_save_regs = jni_conv->CalleeSaveRegisters();
+
+  __ BuildFrame(frame_size, method_reg, callee_save_regs);
+  expected += "addi sp, sp, -208\n"
+              "sd ra, 200(sp)\n"
+              "sd s11, 192(sp)\n"
+              "sd s10, 184(sp)\n"
+              "sd s9, 176(sp)\n"
+              "sd s8, 168(sp)\n"
+              "sd s7, 160(sp)\n"
+              "sd s6, 152(sp)\n"
+              "sd s5, 144(sp)\n"
+              "sd s4, 136(sp)\n"
+              "sd s3, 128(sp)\n"
+              "sd s2, 120(sp)\n"
+              "sd s0, 112(sp)\n"
+              "fsd fs11, 104(sp)\n"
+              "fsd fs10, 96(sp)\n"
+              "fsd fs9, 88(sp)\n"
+              "fsd fs8, 80(sp)\n"
+              "fsd fs7, 72(sp)\n"
+              "fsd fs6, 64(sp)\n"
+              "fsd fs5, 56(sp)\n"
+              "fsd fs4, 48(sp)\n"
+              "fsd fs3, 40(sp)\n"
+              "fsd fs2, 32(sp)\n"
+              "fsd fs1, 24(sp)\n"
+              "fsd fs0, 16(sp)\n"
+              "sd a0, 0(sp)\n";
+
+  __ RemoveFrame(frame_size, callee_save_regs, /*may_suspend=*/ false);
+  expected += "fld fs0, 16(sp)\n"
+              "fld fs1, 24(sp)\n"
+              "fld fs2, 32(sp)\n"
+              "fld fs3, 40(sp)\n"
+              "fld fs4, 48(sp)\n"
+              "fld fs5, 56(sp)\n"
+              "fld fs6, 64(sp)\n"
+              "fld fs7, 72(sp)\n"
+              "fld fs8, 80(sp)\n"
+              "fld fs9, 88(sp)\n"
+              "fld fs10, 96(sp)\n"
+              "fld fs11, 104(sp)\n"
+              "ld s0, 112(sp)\n"
+              "ld s2, 120(sp)\n"
+              "ld s3, 128(sp)\n"
+              "ld s4, 136(sp)\n"
+              "ld s5, 144(sp)\n"
+              "ld s6, 152(sp)\n"
+              "ld s7, 160(sp)\n"
+              "ld s8, 168(sp)\n"
+              "ld s9, 176(sp)\n"
+              "ld s10, 184(sp)\n"
+              "ld s11, 192(sp)\n"
+              "ld ra, 200(sp)\n"
+              "addi sp, sp, 208\n"
+              "ret\n";
+
+  DriverStr(expected, "StackFrame");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, ChangeFrameSize) {
+  std::string expected;
+
+  __ IncreaseFrameSize(128);
+  expected += "addi sp, sp, -128\n";
+  __ DecreaseFrameSize(128);
+  expected += "addi sp, sp, 128\n";
+
+  __ IncreaseFrameSize(0);  // No-op
+  __ DecreaseFrameSize(0);  // No-op
+
+  __ IncreaseFrameSize(2048);
+  expected += "addi sp, sp, -2048\n";
+  __ DecreaseFrameSize(2048);
+  expected += "addi t6, sp, 2047\n"
+              "addi sp, t6, 1\n";
+
+  __ IncreaseFrameSize(4096);
+  expected += "addi t6, sp, -2048\n"
+              "addi sp, t6, -2048\n";
+  __ DecreaseFrameSize(4096);
+  expected += "lui t6, 1\n"
+              "add sp, sp, t6\n";
+
+  __ IncreaseFrameSize(6 * KB);
+  expected += "addi t6, zero, -3\n"
+              "slli t6, t6, 11\n"
+              "add sp, sp, t6\n";
+  __ DecreaseFrameSize(6 * KB);
+  expected += "addi t6, zero, 3\n"
+              "slli t6, t6, 11\n"
+              "add sp, sp, t6\n";
+
+  __ IncreaseFrameSize(6 * KB + 16);
+  expected += "lui t6, 0xffffe\n"
+              "addiw t6, t6, 2048-16\n"
+              "add sp, sp, t6\n";
+  __ DecreaseFrameSize(6 * KB + 16);
+  expected += "lui t6, 2\n"
+              "addiw t6, t6, 16-2048\n"
+              "add sp, sp, t6\n";
+
+  DriverStr(expected, "ChangeFrameSize");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Store) {
+  std::string expected;
+
+  __ Store(FrameOffset(0), AsManaged(A0), kWordSize);
+  expected += "sw a0, 0(sp)\n";
+  __ Store(FrameOffset(2048), AsManaged(S0), kDoubleWordSize);
+  expected += "addi t6, sp, 0x7f8\n"
+              "sd s0, 8(t6)\n";
+
+  __ Store(AsManaged(A1), MemberOffset(256), AsManaged(S2), kDoubleWordSize);
+  expected += "sd s2, 256(a1)\n";
+  __ Store(AsManaged(S3), MemberOffset(4 * KB), AsManaged(T1), kWordSize);
+  expected += "lui t6, 1\n"
+              "add t6, t6, s3\n"
+              "sw t1, 0(t6)\n";
+
+  __ Store(AsManaged(A3), MemberOffset(384), AsManaged(FA5), kDoubleWordSize);
+  expected += "fsd fa5, 384(a3)\n";
+  __ Store(AsManaged(S4), MemberOffset(4 * KB + 16), AsManaged(FT10), kWordSize);
+  expected += "lui t6, 1\n"
+              "add t6, t6, s4\n"
+              "fsw ft10, 16(t6)\n";
+
+  __ StoreRawPtr(FrameOffset(128), AsManaged(A7));
+  expected += "sd a7, 128(sp)\n";
+  __ StoreRawPtr(FrameOffset(6 * KB), AsManaged(S11));
+  expected += "lui t6, 2\n"
+              "add t6, t6, sp\n"
+              "sd s11, -2048(t6)\n";
+
+  __ StoreStackPointerToThread(ThreadOffset64(512), /*tag_sp=*/ false);
+  expected += "sd sp, 512(s1)\n";
+  __ StoreStackPointerToThread(ThreadOffset64(3 * KB), /*tag_sp=*/ true);
+  expected += "ori t6, sp, 0x2\n"
+              "addi t5, s1, 0x7f8\n"
+              "sd t6, 0x408(t5)\n";
+
+  DriverStr(expected, "Store");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Load) {
+  std::string expected;
+
+  __ Load(AsManaged(A0), FrameOffset(0), kWordSize);
+  expected += "lw a0, 0(sp)\n";
+  __ Load(AsManaged(S0), FrameOffset(2048), kDoubleWordSize);
+  expected += "addi t6, sp, 0x7f8\n"
+              "ld s0, 8(t6)\n";
+
+  __ Load(AsManaged(S2), AsManaged(A1), MemberOffset(256), kDoubleWordSize);
+  expected += "ld s2, 256(a1)\n";
+  __ Load(AsManaged(T1), AsManaged(S3), MemberOffset(4 * KB), kWordSize);
+  expected += "lui t6, 1\n"
+              "add t6, t6, s3\n"
+              "lw t1, 0(t6)\n";
+
+  __ Load(AsManaged(FA5), AsManaged(A3), MemberOffset(384), kDoubleWordSize);
+  expected += "fld fa5, 384(a3)\n";
+  __ Load(AsManaged(FT10), AsManaged(S4), MemberOffset(4 * KB + 16), kWordSize);
+  expected += "lui t6, 1\n"
+              "add t6, t6, s4\n"
+              "flw ft10, 16(t6)\n";
+
+  __ LoadRawPtrFromThread(AsManaged(A7), ThreadOffset64(512));
+  expected += "ld a7, 512(s1)\n";
+  __ LoadRawPtrFromThread(AsManaged(S11), ThreadOffset64(3 * KB));
+  expected += "addi t6, s1, 0x7f8\n"
+              "ld s11, 0x408(t6)\n";
+
+  __ LoadGcRootWithoutReadBarrier(AsManaged(T0), AsManaged(A0), MemberOffset(0));
+  expected += "lwu t0, 0(a0)\n";
+  __ LoadGcRootWithoutReadBarrier(AsManaged(T1), AsManaged(S2), MemberOffset(0x800));
+  expected += "addi t6, s2, 0x7f8\n"
+              "lwu t1, 8(t6)\n";
+
+  DriverStr(expected, "Load");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, CreateJObject) {
+  std::string expected;
+
+  __ CreateJObject(AsManaged(A0), FrameOffset(8), AsManaged(A0), /*null_allowed=*/ true);
+  expected += "beqz a0, 1f\n"
+              "addi a0, sp, 8\n"
+              "1:\n";
+  __ CreateJObject(AsManaged(A1), FrameOffset(12), AsManaged(A1), /*null_allowed=*/ false);
+  expected += "addi a1, sp, 12\n";
+  __ CreateJObject(AsManaged(A2), FrameOffset(16), AsManaged(A3), /*null_allowed=*/ true);
+  expected += "li a2, 0\n"
+              "beqz a3, 2f\n"
+              "addi a2, sp, 16\n"
+              "2:\n";
+  __ CreateJObject(AsManaged(A4), FrameOffset(2048), AsManaged(A5), /*null_allowed=*/ false);
+  expected += "addi t6, sp, 2047\n"
+              "addi a4, t6, 1\n";
+
+  DriverStr(expected, "CreateJObject");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, MoveArguments) {
+  std::string expected;
+
+  static constexpr FrameOffset kInvalidReferenceOffset =
+      JNIMacroAssembler<kArmPointerSize>::kInvalidReferenceOffset;
+  static constexpr size_t kNativePointerSize = static_cast<size_t>(kRiscv64PointerSize);
+  static constexpr size_t kFloatSize = 4u;
+  static constexpr size_t kXlenInBytes = 8u;  // Used for integral args and `double`.
+
+  // Normal or @FastNative static with parameters "LIJIJILJI".
+  // Note: This shall not spill references to the stack. The JNI compiler spills
+  // references in an separate initial pass before moving arguments and creating `jobject`s.
+  ArgumentLocation move_dests1[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize),  // `jclass`
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+      ArgumentLocation(FrameOffset(0), kNativePointerSize),
+      ArgumentLocation(FrameOffset(8), kXlenInBytes),
+      ArgumentLocation(FrameOffset(16), kXlenInBytes),
+  };
+  ArgumentLocation move_srcs1[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize),  // `jclass`
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+      ArgumentLocation(FrameOffset(76), 2 * kVRegSize),
+      ArgumentLocation(FrameOffset(84), kVRegSize),
+  };
+  FrameOffset move_refs1[] {
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(40),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(72),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+  };
+  __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1),
+                   ArrayRef<ArgumentLocation>(move_srcs1),
+                   ArrayRef<FrameOffset>(move_refs1));
+  expected += "beqz a7, 1f\n"
+              "addi a7, sp, 72\n"
+              "1:\n"
+              "sd a7, 0(sp)\n"
+              "ld t6, 76(sp)\n"
+              "sd t6, 8(sp)\n"
+              "lw t6, 84(sp)\n"
+              "sd t6, 16(sp)\n"
+              "mv a7, a6\n"
+              "mv a6, a5\n"
+              "mv a5, a4\n"
+              "mv a4, a3\n"
+              "mv a3, a2\n"
+              "li a2, 0\n"
+              "beqz a1, 2f\n"
+              "add a2, sp, 40\n"
+              "2:\n"
+              "mv a1, a0\n";
+
+  // Normal or @FastNative static with parameters "LIJIJILJI" - spill references.
+  ArgumentLocation move_dests1_spill_refs[] = {
+      ArgumentLocation(FrameOffset(40), kVRegSize),
+      ArgumentLocation(FrameOffset(72), kVRegSize),
+  };
+  ArgumentLocation move_srcs1_spill_refs[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+  };
+  FrameOffset move_refs1_spill_refs[] {
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+  };
+  __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests1_spill_refs),
+                   ArrayRef<ArgumentLocation>(move_srcs1_spill_refs),
+                   ArrayRef<FrameOffset>(move_refs1_spill_refs));
+  expected += "sw a1, 40(sp)\n"
+              "sw a7, 72(sp)\n";
+
+  // Normal or @FastNative with parameters "LLIJIJIJLI" (first is `this`).
+  // Note: This shall not spill references to the stack. The JNI compiler spills
+  // references in an separate initial pass before moving arguments and creating `jobject`s.
+  ArgumentLocation move_dests2[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kNativePointerSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+      ArgumentLocation(FrameOffset(0), kXlenInBytes),
+      ArgumentLocation(FrameOffset(8), kNativePointerSize),
+      ArgumentLocation(FrameOffset(16), kXlenInBytes),
+  };
+  ArgumentLocation move_srcs2[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+      ArgumentLocation(FrameOffset(76), 2 * kVRegSize),
+      ArgumentLocation(FrameOffset(84), kVRegSize),
+      ArgumentLocation(FrameOffset(88), kVRegSize),
+  };
+  FrameOffset move_refs2[] {
+      FrameOffset(40),
+      FrameOffset(44),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(84),
+      FrameOffset(kInvalidReferenceOffset),
+  };
+  __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests2),
+                   ArrayRef<ArgumentLocation>(move_srcs2),
+                   ArrayRef<FrameOffset>(move_refs2));
+  // Args in A1-A7 do not move but references are converted to `jobject`.
+  expected += "addi a1, sp, 40\n"
+              "beqz a2, 1f\n"
+              "addi a2, sp, 44\n"
+              "1:\n"
+              "ld t6, 76(sp)\n"
+              "sd t6, 0(sp)\n"
+              "lwu t6, 84(sp)\n"
+              "beqz t6, 2f\n"
+              "addi t6, sp, 84\n"
+              "2:\n"
+              "sd t6, 8(sp)\n"
+              "lw t6, 88(sp)\n"
+              "sd t6, 16(sp)\n";
+
+  // Normal or @FastNative static with parameters "FDFDFDFDFDIJIJIJL".
+  ArgumentLocation move_dests3[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kNativePointerSize),  // `jclass`
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+      ArgumentLocation(FrameOffset(0), kXlenInBytes),
+      ArgumentLocation(FrameOffset(8), kXlenInBytes),
+      ArgumentLocation(FrameOffset(16), kNativePointerSize),
+  };
+  ArgumentLocation move_srcs3[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kNativePointerSize),  // `jclass`
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), 2 * kVRegSize),
+      ArgumentLocation(FrameOffset(88), kVRegSize),
+      ArgumentLocation(FrameOffset(92), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kVRegSize),
+  };
+  FrameOffset move_refs3[] {
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(88),
+  };
+  __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests3),
+                   ArrayRef<ArgumentLocation>(move_srcs3),
+                   ArrayRef<FrameOffset>(move_refs3));
+  // FP args in FA0-FA7 do not move.
+  expected += "sd a5, 0(sp)\n"
+              "sd a6, 8(sp)\n"
+              "beqz a7, 1f\n"
+              "addi a7, sp, 88\n"
+              "1:\n"
+              "sd a7, 16(sp)\n"
+              "mv a5, a2\n"
+              "mv a6, a3\n"
+              "mv a7, a4\n"
+              "lw a2, 88(sp)\n"
+              "ld a3, 92(sp)\n"
+              "mv a4, a1\n"
+              "mv a1, a0\n";
+
+  // @CriticalNative with parameters "DFDFDFDFIDJIJFDIIJ".
+  ArgumentLocation move_dests4[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kFloatSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+      ArgumentLocation(FrameOffset(0), kXlenInBytes),
+      ArgumentLocation(FrameOffset(8), kXlenInBytes),
+  };
+  ArgumentLocation move_srcs4[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA0), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA1), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA2), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA3), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA4), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA5), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA6), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromFRegister(FA7), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kVRegSize),
+      ArgumentLocation(FrameOffset(92), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), 2 * kVRegSize),
+      ArgumentLocation(FrameOffset(112), kVRegSize),
+      ArgumentLocation(FrameOffset(116), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize),
+  };
+  FrameOffset move_refs4[] {
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+  };
+  __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests4),
+                   ArrayRef<ArgumentLocation>(move_srcs4),
+                   ArrayRef<FrameOffset>(move_refs4));
+  // FP args in FA0-FA7 and integral args in A2-A4 do not move.
+  expected += "sd a6, 0(sp)\n"
+              "sd a7, 8(sp)\n"
+              "mv a0, a1\n"
+              "ld a1, 92(sp)\n"
+              "ld a6, 116(sp)\n"
+              "mv a7, a5\n"
+              "lw a5, 112(sp)\n";
+
+  // @CriticalNative with parameters "JIJIJIJIJI".
+  ArgumentLocation move_dests5[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A0), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kXlenInBytes),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), kXlenInBytes),
+      ArgumentLocation(FrameOffset(0), kXlenInBytes),
+      ArgumentLocation(FrameOffset(8), kXlenInBytes),
+  };
+  ArgumentLocation move_srcs5[] = {
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A1), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A2), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A3), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A4), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A5), 2 * kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A6), kVRegSize),
+      ArgumentLocation(Riscv64ManagedRegister::FromXRegister(A7), 2 * kVRegSize),
+      ArgumentLocation(FrameOffset(84), kVRegSize),
+      ArgumentLocation(FrameOffset(88), 2 * kVRegSize),
+      ArgumentLocation(FrameOffset(96), kVRegSize),
+  };
+  FrameOffset move_refs5[] {
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+      FrameOffset(kInvalidReferenceOffset),
+  };
+  __ MoveArguments(ArrayRef<ArgumentLocation>(move_dests5),
+                   ArrayRef<ArgumentLocation>(move_srcs5),
+                   ArrayRef<FrameOffset>(move_refs5));
+  expected += "ld t6, 88(sp)\n"
+              "sd t6, 0(sp)\n"
+              "lw t6, 96(sp)\n"
+              "sd t6, 8(sp)\n"
+              "mv a0, a1\n"
+              "mv a1, a2\n"
+              "mv a2, a3\n"
+              "mv a3, a4\n"
+              "mv a4, a5\n"
+              "mv a5, a6\n"
+              "mv a6, a7\n"
+              "lw a7, 84(sp)\n";
+
+  DriverStr(expected, "MoveArguments");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Move) {
+  std::string expected;
+
+  __ Move(AsManaged(A0), AsManaged(A1), kWordSize);
+  expected += "mv a0, a1\n";
+  __ Move(AsManaged(A2), AsManaged(A3), kDoubleWordSize);
+  expected += "mv a2, a3\n";
+
+  __ Move(AsManaged(A4), AsManaged(A4), kWordSize);  // No-op.
+  __ Move(AsManaged(A5), AsManaged(A5), kDoubleWordSize);  // No-op.
+
+  DriverStr(expected, "Move");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, GetCurrentThread) {
+  std::string expected;
+
+  __ GetCurrentThread(AsManaged(A0));
+  expected += "mv a0, s1\n";
+
+  __ GetCurrentThread(FrameOffset(256));
+  expected += "sd s1, 256(sp)\n";
+  __ GetCurrentThread(FrameOffset(3 * KB));
+  expected += "addi t6, sp, 0x7f8\n"
+              "sd s1, 0x408(t6)\n";
+
+  DriverStr(expected, "GetCurrentThread");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, DecodeJNITransitionOrLocalJObject) {
+  std::string expected;
+
+  constexpr int64_t kGlobalOrWeakGlobalMask = IndirectReferenceTable::GetGlobalOrWeakGlobalMask();
+  constexpr int64_t kIndirectRefKindMask = IndirectReferenceTable::GetIndirectRefKindMask();
+
+  std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+  std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+  __ DecodeJNITransitionOrLocalJObject(AsManaged(A0), slow_path.get(), resume.get());
+  expected += "beqz a0, 1f\n"
+              "andi t6, a0, " + std::to_string(kGlobalOrWeakGlobalMask) + "\n"
+              "bnez t6, 2f\n"
+              "andi a0, a0, ~" + std::to_string(kIndirectRefKindMask) + "\n"
+              "lw a0, (a0)\n";
+
+  __ Bind(resume.get());
+  expected += "1:\n";
+
+  expected += EmitRet();
+
+  __ Bind(slow_path.get());
+  expected += "2:\n";
+
+  __ Jump(resume.get());
+  expected += "j 1b\n";
+
+  DriverStr(expected, "DecodeJNITransitionOrLocalJObject");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, JumpCodePointer) {
+  std::string expected;
+
+  __ Jump(AsManaged(A0), Offset(24));
+  expected += "ld t6, 24(a0)\n"
+              "jr t6\n";
+
+  __ Jump(AsManaged(S2), Offset(2048));
+  expected += "addi t6, s2, 0x7f8\n"
+              "ld t6, 8(t6)\n"
+              "jr t6\n";
+
+  DriverStr(expected, "JumpCodePointer");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Call) {
+  std::string expected;
+
+  __ Call(AsManaged(A0), Offset(32));
+  expected += "ld ra, 32(a0)\n"
+              "jalr ra\n";
+
+  __ Call(AsManaged(S2), Offset(2048));
+  expected += "addi t6, s2, 0x7f8\n"
+              "ld ra, 8(t6)\n"
+              "jalr ra\n";
+
+  __ CallFromThread(ThreadOffset64(256));
+  expected += "ld ra, 256(s1)\n"
+              "jalr ra\n";
+
+  __ CallFromThread(ThreadOffset64(3 * KB));
+  expected += "addi t6, s1, 0x7f8\n"
+              "ld ra, 0x408(t6)\n"
+              "jalr ra\n";
+
+  DriverStr(expected, "Call");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Transitions) {
+  std::string expected;
+
+  constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
+  constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
+  static_assert(kRunnableStateValue == 0u);
+  constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+  static_assert(thread_flags_offset.SizeValue() == 0u);
+  constexpr size_t thread_held_mutex_mutator_lock_offset =
+      Thread::HeldMutexOffset<kRiscv64PointerSize>(kMutatorLock).SizeValue();
+  constexpr size_t thread_mutator_lock_offset =
+      Thread::MutatorLockOffset<kRiscv64PointerSize>().SizeValue();
+
+  std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+  std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+  const ManagedRegister raw_scratch_regs[] = { AsManaged(T0), AsManaged(T1) };
+  const ArrayRef<const ManagedRegister> scratch_regs(raw_scratch_regs);
+
+  __ TryToTransitionFromRunnableToNative(slow_path.get(), scratch_regs);
+  expected += "1:\n"
+              "lr.w t0, (s1)\n"
+              "li t1, " + std::to_string(kNativeStateValue) + "\n"
+              "bnez t0, 4f\n"
+              "sc.w.rl t0, t1, (s1)\n"
+              "bnez t0, 1b\n"
+              "addi t6, s1, 0x7f8\n"
+              "sd x0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n";
+
+  __ TryToTransitionFromNativeToRunnable(slow_path.get(), scratch_regs, AsManaged(A0));
+  expected += "2:\n"
+              "lr.w.aq t0, (s1)\n"
+              "li t1, " + std::to_string(kNativeStateValue) + "\n"
+              "bne t0, t1, 4f\n"
+              "sc.w t0, x0, (s1)\n"
+              "bnez t0, 2b\n"
+              "ld t0, " + std::to_string(thread_mutator_lock_offset) + "(s1)\n"
+              "addi t6, s1, 0x7f8\n"
+              "sd t0, " + std::to_string(thread_held_mutex_mutator_lock_offset - 0x7f8u) + "(t6)\n";
+
+  __ Bind(resume.get());
+  expected += "3:\n";
+
+  expected += EmitRet();
+
+  __ Bind(slow_path.get());
+  expected += "4:\n";
+
+  __ Jump(resume.get());
+  expected += "j 3b";
+
+  DriverStr(expected, "SuspendCheck");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, SuspendCheck) {
+  std::string expected;
+
+  ThreadOffset64 thread_flags_offet = Thread::ThreadFlagsOffset<kRiscv64PointerSize>();
+
+  std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+  std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+  __ SuspendCheck(slow_path.get());
+  expected += "lw t6, " + std::to_string(thread_flags_offet.Int32Value()) + "(s1)\n"
+              "andi t6, t6, " + std::to_string(Thread::SuspendOrCheckpointRequestFlags()) + "\n"
+              "bnez t6, 2f\n";
+
+  __ Bind(resume.get());
+  expected += "1:\n";
+
+  expected += EmitRet();
+
+  __ Bind(slow_path.get());
+  expected += "2:\n";
+
+  __ Jump(resume.get());
+  expected += "j 1b";
+
+  DriverStr(expected, "SuspendCheck");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, Exception) {
+  std::string expected;
+
+  ThreadOffset64 exception_offset = Thread::ExceptionOffset<kArm64PointerSize>();
+  ThreadOffset64 deliver_offset = QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, pDeliverException);
+
+  std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+
+  __ ExceptionPoll(slow_path.get());
+  expected += "ld t6, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n"
+              "bnez t6, 1f\n";
+
+  expected += EmitRet();
+
+  __ Bind(slow_path.get());
+  expected += "1:\n";
+
+  __ DeliverPendingException();
+  expected += "ld a0, " + std::to_string(exception_offset.Int32Value()) + "(s1)\n"
+              "ld ra, " + std::to_string(deliver_offset.Int32Value()) + "(s1)\n"
+              "jalr ra\n"
+              "unimp\n";
+
+  DriverStr(expected, "Exception");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, JumpLabel) {
+  std::string expected;
+
+  std::unique_ptr<JNIMacroLabel> target = __ CreateLabel();
+  std::unique_ptr<JNIMacroLabel> back = __ CreateLabel();
+
+  __ Jump(target.get());
+  expected += "j 2f\n";
+
+  __ Bind(back.get());
+  expected += "1:\n";
+
+  __ Move(AsManaged(A0), AsManaged(A1), static_cast<size_t>(kRiscv64PointerSize));
+  expected += "mv a0, a1\n";
+
+  __ Bind(target.get());
+  expected += "2:\n";
+
+  __ Jump(back.get());
+  expected += "j 1b\n";
+
+  DriverStr(expected, "JumpLabel");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, ReadBarrier) {
+  std::string expected;
+
+  ThreadOffset64 is_gc_marking_offset = Thread::IsGcMarkingOffset<kRiscv64PointerSize>();
+  MemberOffset monitor_offset = mirror::Object::MonitorOffset();
+
+  std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+  std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+  __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kNotZero);
+  expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n"
+              "bnez t6, 2f\n";
+
+  __ TestGcMarking(slow_path.get(), JNIMacroUnaryCondition::kZero);
+  expected += "lw t6, " + std::to_string(is_gc_marking_offset.Int32Value()) + "(s1)\n"
+              "beqz t6, 2f\n";
+
+  __ Bind(resume.get());
+  expected += "1:\n";
+
+  expected += EmitRet();
+
+  __ Bind(slow_path.get());
+  expected += "2:\n";
+
+  __ TestMarkBit(AsManaged(A0), resume.get(), JNIMacroUnaryCondition::kNotZero);
+  expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(a0)\n"
+              "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n"
+              "bltz t6, 1b\n";
+
+  __ TestMarkBit(AsManaged(T0), resume.get(), JNIMacroUnaryCondition::kZero);
+  expected += "lw t6, " + std::to_string(monitor_offset.Int32Value()) + "(t0)\n"
+              "slliw t6, t6, " + std::to_string(31 - LockWord::kMarkBitStateShift) + "\n"
+              "bgez t6, 1b\n";
+
+  DriverStr(expected, "ReadBarrier");
+}
+
+TEST_F(JniMacroAssemblerRiscv64Test, TestByteAndJumpIfNotZero) {
+  // Note: The `TestByteAndJumpIfNotZero()` takes the address as a `uintptr_t`.
+  // Use 32-bit addresses, so that we can include this test in 32-bit host tests.
+
+  std::string expected;
+
+  std::unique_ptr<JNIMacroLabel> slow_path = __ CreateLabel();
+  std::unique_ptr<JNIMacroLabel> resume = __ CreateLabel();
+
+  __ TestByteAndJumpIfNotZero(0x12345678u, slow_path.get());
+  expected += "lui t6, 0x12345\n"
+              "lb t6, 0x678(t6)\n"
+              "bnez t6, 2f\n";
+
+  __ TestByteAndJumpIfNotZero(0x87654321u, slow_path.get());
+  expected += "lui t6, 0x87654/4\n"
+              "slli t6, t6, 2\n"
+              "lb t6, 0x321(t6)\n"
+              "bnez t6, 2f\n";
+
+  __ Bind(resume.get());
+  expected += "1:\n";
+
+  expected += EmitRet();
+
+  __ Bind(slow_path.get());
+  expected += "2:\n";
+
+  __ TestByteAndJumpIfNotZero(0x456789abu, resume.get());
+  expected += "lui t6, 0x45678+1\n"
+              "lb t6, 0x9ab-0x1000(t6)\n"
+              "bnez t6, 1b\n";
+
+  DriverStr(expected, "TestByteAndJumpIfNotZero");
+}
+
+#undef __
+
+}  // namespace riscv64
+}  // namespace art
diff --git a/compiler/utils/riscv64/managed_register_riscv64.cc b/compiler/utils/riscv64/managed_register_riscv64.cc
index 560019ae09..99bd4be784 100644
--- a/compiler/utils/riscv64/managed_register_riscv64.cc
+++ b/compiler/utils/riscv64/managed_register_riscv64.cc
@@ -18,7 +18,7 @@
 
 #include "base/globals.h"
 
-namespace art {
+namespace art HIDDEN {
 namespace riscv64 {
 
 bool Riscv64ManagedRegister::Overlaps(const Riscv64ManagedRegister& other) const {
diff --git a/compiler/utils/riscv64/managed_register_riscv64.h b/compiler/utils/riscv64/managed_register_riscv64.h
index 8e02a9dcc8..622d766945 100644
--- a/compiler/utils/riscv64/managed_register_riscv64.h
+++ b/compiler/utils/riscv64/managed_register_riscv64.h
@@ -24,7 +24,7 @@
 #include "base/macros.h"
 #include "utils/managed_register.h"
 
-namespace art {
+namespace art HIDDEN {
 namespace riscv64 {
 
 const int kNumberOfXRegIds = kNumberOfXRegisters;
diff --git a/compiler/utils/riscv64/managed_register_riscv64_test.cc b/compiler/utils/riscv64/managed_register_riscv64_test.cc
index c6ad2dc38a..d7012a796a 100644
--- a/compiler/utils/riscv64/managed_register_riscv64_test.cc
+++ b/compiler/utils/riscv64/managed_register_riscv64_test.cc
@@ -19,7 +19,7 @@
 #include "base/globals.h"
 #include "gtest/gtest.h"
 
-namespace art {
+namespace art HIDDEN {
 namespace riscv64 {
 
 TEST(Riscv64ManagedRegister, NoRegister) {
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
index d0fff73df3..1be4532f3e 100644
--- a/compiler/utils/stack_checks.h
+++ b/compiler/utils/stack_checks.h
@@ -35,7 +35,7 @@ static constexpr size_t kSmallFrameSize = 1 * KB;
 // stack overflow check on method entry.
 //
 // A frame is considered large when it's above kLargeFrameSize.
-static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa ATTRIBUTE_UNUSED) {
+static inline bool FrameNeedsStackCheck(size_t size, [[maybe_unused]] InstructionSet isa) {
   return size >= kLargeFrameSize;
 }
 
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 0f7854dc5c..e6503045fa 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -955,6 +955,12 @@ class X86Assembler final : public Assembler {
     lock()->xaddl(address, reg);
   }
 
+  void rdtsc() {
+    AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+    EmitUint8(0x0F);
+    EmitUint8(0x31);
+  }
+
   //
   // Misc. functionality
   //
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 5da6f04402..432322aea7 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -89,19 +89,7 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler,
       addresses_.push_back(x86::Address(x86::ESP, 987654321));
     }
 
-    if (registers_.size() == 0) {
-      registers_.insert(end(registers_),
-                        {
-                          new x86::Register(x86::EAX),
-                          new x86::Register(x86::EBX),
-                          new x86::Register(x86::ECX),
-                          new x86::Register(x86::EDX),
-                          new x86::Register(x86::EBP),
-                          new x86::Register(x86::ESP),
-                          new x86::Register(x86::ESI),
-                          new x86::Register(x86::EDI)
-                        });
-
+    if (secondary_register_names_.empty()) {
       secondary_register_names_.emplace(x86::Register(x86::EAX), "ax");
       secondary_register_names_.emplace(x86::Register(x86::EBX), "bx");
       secondary_register_names_.emplace(x86::Register(x86::ECX), "cx");
@@ -121,38 +109,28 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler,
       tertiary_register_names_.emplace(x86::Register(x86::ESI), "dh");
       tertiary_register_names_.emplace(x86::Register(x86::EDI), "bh");
     }
-
-    if (fp_registers_.size() == 0) {
-      fp_registers_.insert(end(fp_registers_),
-                           {
-                             new x86::XmmRegister(x86::XMM0),
-                             new x86::XmmRegister(x86::XMM1),
-                             new x86::XmmRegister(x86::XMM2),
-                             new x86::XmmRegister(x86::XMM3),
-                             new x86::XmmRegister(x86::XMM4),
-                             new x86::XmmRegister(x86::XMM5),
-                             new x86::XmmRegister(x86::XMM6),
-                             new x86::XmmRegister(x86::XMM7)
-                           });
-    }
   }
 
   void TearDown() override {
     AssemblerTest::TearDown();
-    STLDeleteElements(&registers_);
-    STLDeleteElements(&fp_registers_);
   }
 
   std::vector<x86::Address> GetAddresses() override {
     return addresses_;
   }
 
-  std::vector<x86::Register*> GetRegisters() override {
-    return registers_;
+  ArrayRef<const x86::Register> GetRegisters() override {
+    static constexpr x86::Register kRegisters[] = {
+        x86::EAX, x86::EBX, x86::ECX, x86::EDX, x86::EBP, x86::ESP, x86::ESI, x86::EDI
+    };
+    return ArrayRef<const x86::Register>(kRegisters);
   }
 
-  std::vector<x86::XmmRegister*> GetFPRegisters() override {
-    return fp_registers_;
+  ArrayRef<const x86::XmmRegister> GetFPRegisters() override {
+    static constexpr x86::XmmRegister kFPRegisters[] = {
+        x86::XMM0, x86::XMM1, x86::XMM2, x86::XMM3, x86::XMM4, x86::XMM5, x86::XMM6, x86::XMM7
+    };
+    return ArrayRef<const x86::XmmRegister>(kFPRegisters);
   }
 
   x86::Immediate CreateImmediate(int64_t imm_value) override {
@@ -173,10 +151,8 @@ class AssemblerX86Test : public AssemblerTest<x86::X86Assembler,
 
  private:
   std::vector<x86::Address> addresses_;
-  std::vector<x86::Register*> registers_;
   std::map<x86::Register, std::string, X86RegisterCompare> secondary_register_names_;
   std::map<x86::Register, std::string, X86RegisterCompare> tertiary_register_names_;
-  std::vector<x86::XmmRegister*> fp_registers_;
 };
 
 class AssemblerX86AVXTest : public AssemblerX86Test {
@@ -267,28 +243,28 @@ TEST_F(AssemblerX86Test, RepeatAF) {
 TEST_F(AssemblerX86Test, PoplAllAddresses) {
   // Make sure all addressing modes combinations are tested at least once.
   std::vector<x86::Address> all_addresses;
-  for (x86::Register* base : GetRegisters()) {
+  for (x86::Register base : GetRegisters()) {
     // Base only.
-    all_addresses.push_back(x86::Address(*base, -1));
-    all_addresses.push_back(x86::Address(*base, 0));
-    all_addresses.push_back(x86::Address(*base, 1));
-    all_addresses.push_back(x86::Address(*base, 123456789));
-    for (x86::Register* index : GetRegisters()) {
-      if (*index == x86::ESP) {
+    all_addresses.push_back(x86::Address(base, -1));
+    all_addresses.push_back(x86::Address(base, 0));
+    all_addresses.push_back(x86::Address(base, 1));
+    all_addresses.push_back(x86::Address(base, 123456789));
+    for (x86::Register index : GetRegisters()) {
+      if (index == x86::ESP) {
         // Index cannot be ESP.
         continue;
-      } else if (*base == *index) {
+      } else if (base == index) {
        // Index only.
-       all_addresses.push_back(x86::Address(*index, TIMES_1, -1));
-       all_addresses.push_back(x86::Address(*index, TIMES_2, 0));
-       all_addresses.push_back(x86::Address(*index, TIMES_4, 1));
-       all_addresses.push_back(x86::Address(*index, TIMES_8, 123456789));
+       all_addresses.push_back(x86::Address(index, TIMES_1, -1));
+       all_addresses.push_back(x86::Address(index, TIMES_2, 0));
+       all_addresses.push_back(x86::Address(index, TIMES_4, 1));
+       all_addresses.push_back(x86::Address(index, TIMES_8, 123456789));
       }
       // Base and index.
-      all_addresses.push_back(x86::Address(*base, *index, TIMES_1, -1));
-      all_addresses.push_back(x86::Address(*base, *index, TIMES_2, 0));
-      all_addresses.push_back(x86::Address(*base, *index, TIMES_4, 1));
-      all_addresses.push_back(x86::Address(*base, *index, TIMES_8, 123456789));
+      all_addresses.push_back(x86::Address(base, index, TIMES_1, -1));
+      all_addresses.push_back(x86::Address(base, index, TIMES_2, 0));
+      all_addresses.push_back(x86::Address(base, index, TIMES_4, 1));
+      all_addresses.push_back(x86::Address(base, index, TIMES_8, 123456789));
     }
   }
   DriverStr(RepeatA(&x86::X86Assembler::popl, all_addresses, "popl {mem}"), "popq");
@@ -510,11 +486,11 @@ TEST_F(AssemblerX86Test, PopcntlAddress) {
 // Rorl only allows CL as the shift count.
 std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86::Register> registers = assembler_test->GetRegisters();
   x86::Register shifter(x86::ECX);
-  for (auto reg : registers) {
-    assembler->rorl(*reg, shifter);
-    str << "rorl %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->rorl(reg, shifter);
+    str << "rorl %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -530,11 +506,11 @@ TEST_F(AssemblerX86Test, RorlImm) {
 // Roll only allows CL as the shift count.
 std::string roll_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86::Register*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86::Register> registers = assembler_test->GetRegisters();
   x86::Register shifter(x86::ECX);
-  for (auto reg : registers) {
-    assembler->roll(*reg, shifter);
-    str << "roll %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->roll(reg, shifter);
+    str << "roll %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -1379,27 +1355,27 @@ TEST_F(AssemblerX86Test, AddressDisplaceBy) {
 
   for (int32_t disp0 : displacements) {  // initial displacement
     for (int32_t disp : displacements) {  // extra displacement
-      for (const x86::Register *reg : GetRegisters()) {
+      for (x86::Register reg : GetRegisters()) {
         // Test non-SIB addressing.
-        EXPECT_EQ(x86::Address::displace(x86::Address(*reg, disp0), disp),
-                  x86::Address(*reg, disp0 + disp));
+        EXPECT_EQ(x86::Address::displace(x86::Address(reg, disp0), disp),
+                  x86::Address(reg, disp0 + disp));
 
         // Test SIB addressing with EBP base.
-        if (*reg != x86::ESP) {
+        if (reg != x86::ESP) {
           for (ScaleFactor scale : scales) {
-            EXPECT_EQ(x86::Address::displace(x86::Address(*reg, scale, disp0), disp),
-                      x86::Address(*reg, scale, disp0 + disp));
+            EXPECT_EQ(x86::Address::displace(x86::Address(reg, scale, disp0), disp),
+                      x86::Address(reg, scale, disp0 + disp));
           }
         }
 
         // Test SIB addressing with different base.
-        for (const x86::Register *index : GetRegisters()) {
-          if (*index == x86::ESP) {
+        for (x86::Register index : GetRegisters()) {
+          if (index == x86::ESP) {
             continue;  // Skip ESP as it cannot be used with this address constructor.
           }
           for (ScaleFactor scale : scales) {
-            EXPECT_EQ(x86::Address::displace(x86::Address(*reg, *index, scale, disp0), disp),
-                      x86::Address(*reg, *index, scale, disp0 + disp));
+            EXPECT_EQ(x86::Address::displace(x86::Address(reg, index, scale, disp0), disp),
+                      x86::Address(reg, index, scale, disp0 + disp));
           }
         }
 
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 154e50b4e4..dfdbc183f1 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -83,7 +83,7 @@ void X86JNIMacroAssembler::BuildFrame(size_t frame_size,
 
 void X86JNIMacroAssembler::RemoveFrame(size_t frame_size,
                                        ArrayRef<const ManagedRegister> spill_regs,
-                                       bool may_suspend ATTRIBUTE_UNUSED) {
+                                       [[maybe_unused]] bool may_suspend) {
   CHECK_ALIGNED(frame_size, kNativeStackAlignment);
   cfi().RememberState();
   // -kFramePointerSize for ArtMethod*.
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 3fdf05bed9..e2a32a7337 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -5244,6 +5244,12 @@ void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
   EmitOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::rdtsc() {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x31);
+}
+
 void X86_64Assembler::repne_scasb() {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF2);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 235ea03e2b..cb62500bc9 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -964,6 +964,8 @@ class X86_64Assembler final : public Assembler {
   void popcntq(CpuRegister dst, CpuRegister src);
   void popcntq(CpuRegister dst, const Address& src);
 
+  void rdtsc();
+
   void rorl(CpuRegister reg, const Immediate& imm);
   void rorl(CpuRegister operand, CpuRegister shifter);
   void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index a7c206afaa..e985441101 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -199,24 +199,7 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler,
       addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::R15), 123456789));
     }
 
-    if (registers_.size() == 0) {
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RAX));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RBX));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RCX));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RDX));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RBP));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RSP));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RSI));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::RDI));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R8));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R9));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R10));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R11));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R12));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R13));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R14));
-      registers_.push_back(new x86_64::CpuRegister(x86_64::R15));
-
+    if (secondary_register_names_.empty()) {
       secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RAX), "eax");
       secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RBX), "ebx");
       secondary_register_names_.emplace(x86_64::CpuRegister(x86_64::RCX), "ecx");
@@ -267,42 +250,59 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler,
       quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R13), "r13b");
       quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R14), "r14b");
       quaternary_register_names_.emplace(x86_64::CpuRegister(x86_64::R15), "r15b");
-
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM0));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM1));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM2));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM3));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM4));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM5));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM6));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM7));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM8));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM9));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM10));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM11));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM12));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM13));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM14));
-      fp_registers_.push_back(new x86_64::XmmRegister(x86_64::XMM15));
     }
   }
 
   void TearDown() override {
     AssemblerTest::TearDown();
-    STLDeleteElements(&registers_);
-    STLDeleteElements(&fp_registers_);
   }
 
   std::vector<x86_64::Address> GetAddresses() override {
     return addresses_;
   }
 
-  std::vector<x86_64::CpuRegister*> GetRegisters() override {
-    return registers_;
+  ArrayRef<const x86_64::CpuRegister> GetRegisters() override {
+    static constexpr x86_64::CpuRegister kRegisters[] = {
+        x86_64::CpuRegister(x86_64::RAX),
+        x86_64::CpuRegister(x86_64::RBX),
+        x86_64::CpuRegister(x86_64::RCX),
+        x86_64::CpuRegister(x86_64::RDX),
+        x86_64::CpuRegister(x86_64::RBP),
+        x86_64::CpuRegister(x86_64::RSP),
+        x86_64::CpuRegister(x86_64::RSI),
+        x86_64::CpuRegister(x86_64::RDI),
+        x86_64::CpuRegister(x86_64::R8),
+        x86_64::CpuRegister(x86_64::R9),
+        x86_64::CpuRegister(x86_64::R10),
+        x86_64::CpuRegister(x86_64::R11),
+        x86_64::CpuRegister(x86_64::R12),
+        x86_64::CpuRegister(x86_64::R13),
+        x86_64::CpuRegister(x86_64::R14),
+        x86_64::CpuRegister(x86_64::R15),
+    };
+    return ArrayRef<const x86_64::CpuRegister>(kRegisters);
   }
 
-  std::vector<x86_64::XmmRegister*> GetFPRegisters() override {
-    return fp_registers_;
+  ArrayRef<const x86_64::XmmRegister> GetFPRegisters() override {
+    static constexpr x86_64::XmmRegister kFPRegisters[] = {
+        x86_64::XmmRegister(x86_64::XMM0),
+        x86_64::XmmRegister(x86_64::XMM1),
+        x86_64::XmmRegister(x86_64::XMM2),
+        x86_64::XmmRegister(x86_64::XMM3),
+        x86_64::XmmRegister(x86_64::XMM4),
+        x86_64::XmmRegister(x86_64::XMM5),
+        x86_64::XmmRegister(x86_64::XMM6),
+        x86_64::XmmRegister(x86_64::XMM7),
+        x86_64::XmmRegister(x86_64::XMM8),
+        x86_64::XmmRegister(x86_64::XMM9),
+        x86_64::XmmRegister(x86_64::XMM10),
+        x86_64::XmmRegister(x86_64::XMM11),
+        x86_64::XmmRegister(x86_64::XMM12),
+        x86_64::XmmRegister(x86_64::XMM13),
+        x86_64::XmmRegister(x86_64::XMM14),
+        x86_64::XmmRegister(x86_64::XMM15),
+    };
+    return ArrayRef<const x86_64::XmmRegister>(kFPRegisters);
   }
 
   x86_64::Immediate CreateImmediate(int64_t imm_value) override {
@@ -328,11 +328,9 @@ class AssemblerX86_64Test : public AssemblerTest<x86_64::X86_64Assembler,
 
  private:
   std::vector<x86_64::Address> addresses_;
-  std::vector<x86_64::CpuRegister*> registers_;
   std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
   std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_;
   std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_;
-  std::vector<x86_64::XmmRegister*> fp_registers_;
 };
 
 class AssemblerX86_64AVXTest : public AssemblerX86_64Test {
@@ -515,28 +513,28 @@ TEST_F(AssemblerX86_64Test, Toolchain) {
 TEST_F(AssemblerX86_64Test, PopqAllAddresses) {
   // Make sure all addressing modes combinations are tested at least once.
   std::vector<x86_64::Address> all_addresses;
-  for (x86_64::CpuRegister* base : GetRegisters()) {
+  for (const x86_64::CpuRegister& base : GetRegisters()) {
     // Base only.
-    all_addresses.push_back(x86_64::Address(*base, -1));
-    all_addresses.push_back(x86_64::Address(*base, 0));
-    all_addresses.push_back(x86_64::Address(*base, 1));
-    all_addresses.push_back(x86_64::Address(*base, 123456789));
-    for (x86_64::CpuRegister* index : GetRegisters()) {
-      if (index->AsRegister() == x86_64::RSP) {
+    all_addresses.push_back(x86_64::Address(base, -1));
+    all_addresses.push_back(x86_64::Address(base, 0));
+    all_addresses.push_back(x86_64::Address(base, 1));
+    all_addresses.push_back(x86_64::Address(base, 123456789));
+    for (const x86_64::CpuRegister& index : GetRegisters()) {
+      if (index.AsRegister() == x86_64::RSP) {
         // Index cannot be RSP.
         continue;
-      } else if (base->AsRegister() == index->AsRegister()) {
+      } else if (base.AsRegister() == index.AsRegister()) {
        // Index only.
-       all_addresses.push_back(x86_64::Address(*index, TIMES_1, -1));
-       all_addresses.push_back(x86_64::Address(*index, TIMES_2, 0));
-       all_addresses.push_back(x86_64::Address(*index, TIMES_4, 1));
-       all_addresses.push_back(x86_64::Address(*index, TIMES_8, 123456789));
+       all_addresses.push_back(x86_64::Address(index, TIMES_1, -1));
+       all_addresses.push_back(x86_64::Address(index, TIMES_2, 0));
+       all_addresses.push_back(x86_64::Address(index, TIMES_4, 1));
+       all_addresses.push_back(x86_64::Address(index, TIMES_8, 123456789));
       }
       // Base and index.
-      all_addresses.push_back(x86_64::Address(*base, *index, TIMES_1, -1));
-      all_addresses.push_back(x86_64::Address(*base, *index, TIMES_2, 0));
-      all_addresses.push_back(x86_64::Address(*base, *index, TIMES_4, 1));
-      all_addresses.push_back(x86_64::Address(*base, *index, TIMES_8, 123456789));
+      all_addresses.push_back(x86_64::Address(base, index, TIMES_1, -1));
+      all_addresses.push_back(x86_64::Address(base, index, TIMES_2, 0));
+      all_addresses.push_back(x86_64::Address(base, index, TIMES_4, 1));
+      all_addresses.push_back(x86_64::Address(base, index, TIMES_8, 123456789));
     }
   }
   DriverStr(RepeatA(&x86_64::X86_64Assembler::popq, all_addresses, "popq {mem}"), "popq");
@@ -641,11 +639,11 @@ TEST_F(AssemblerX86_64Test, SublImm) {
 // Shll only allows CL as the shift count.
 std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->shll(*reg, shifter);
-    str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->shll(reg, shifter);
+    str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -662,11 +660,11 @@ TEST_F(AssemblerX86_64Test, ShllImm) {
 // Shlq only allows CL as the shift count.
 std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->shlq(*reg, shifter);
-    str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->shlq(reg, shifter);
+    str << "shlq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -683,11 +681,11 @@ TEST_F(AssemblerX86_64Test, ShlqImm) {
 // Shrl only allows CL as the shift count.
 std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->shrl(*reg, shifter);
-    str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->shrl(reg, shifter);
+    str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -703,11 +701,11 @@ TEST_F(AssemblerX86_64Test, ShrlImm) {
 // Shrq only allows CL as the shift count.
 std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->shrq(*reg, shifter);
-    str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->shrq(reg, shifter);
+    str << "shrq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -723,11 +721,11 @@ TEST_F(AssemblerX86_64Test, ShrqImm) {
 // Sarl only allows CL as the shift count.
 std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->sarl(*reg, shifter);
-    str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->sarl(reg, shifter);
+    str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -743,11 +741,11 @@ TEST_F(AssemblerX86_64Test, SarlImm) {
 // Sarq only allows CL as the shift count.
 std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->sarq(*reg, shifter);
-    str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->sarq(reg, shifter);
+    str << "sarq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -763,11 +761,11 @@ TEST_F(AssemblerX86_64Test, SarqImm) {
 // Rorl only allows CL as the shift count.
 std::string rorl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->rorl(*reg, shifter);
-    str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->rorl(reg, shifter);
+    str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -783,11 +781,11 @@ TEST_F(AssemblerX86_64Test, RorlImm) {
 // Roll only allows CL as the shift count.
 std::string roll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->roll(*reg, shifter);
-    str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->roll(reg, shifter);
+    str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -803,11 +801,11 @@ TEST_F(AssemblerX86_64Test, RollImm) {
 // Rorq only allows CL as the shift count.
 std::string rorq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->rorq(*reg, shifter);
-    str << "rorq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->rorq(reg, shifter);
+    str << "rorq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -823,11 +821,11 @@ TEST_F(AssemblerX86_64Test, RorqImm) {
 // Rolq only allows CL as the shift count.
 std::string rolq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   x86_64::CpuRegister shifter(x86_64::RCX);
-  for (auto reg : registers) {
-    assembler->rolq(*reg, shifter);
-    str << "rolq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
+  for (auto&& reg : registers) {
+    assembler->rolq(reg, shifter);
+    str << "rolq %cl, %" << assembler_test->GetRegisterName(reg) << "\n";
   }
   return str.str();
 }
@@ -2135,7 +2133,7 @@ TEST_F(AssemblerX86_64Test, Psrldq) {
             "psrldq $2, %xmm15\n", "psrldqi");
 }
 
-std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string x87_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test,
                    x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -2202,7 +2200,7 @@ TEST_F(AssemblerX86_64Test, RetImm) {
                     "ret ${imm}", /*non-negative*/ true), "ret");
 }
 
-std::string ret_and_leave_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string ret_and_leave_fn([[maybe_unused]] AssemblerX86_64Test::Base* assembler_test,
                              x86_64::X86_64Assembler* assembler) {
   std::ostringstream str;
 
@@ -2375,13 +2373,13 @@ std::string setcc_test_fn(AssemblerX86_64Test::Base* assembler_test,
   std::string suffixes[15] = { "o", "no", "b", "ae", "e", "ne", "be", "a", "s", "ns", "pe", "po",
                                "l", "ge", "le" };
 
-  std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
+  ArrayRef<const x86_64::CpuRegister> registers = assembler_test->GetRegisters();
   std::ostringstream str;
 
-  for (auto reg : registers) {
+  for (auto&& reg : registers) {
     for (size_t i = 0; i < 15; ++i) {
-      assembler->setcc(static_cast<x86_64::Condition>(i), *reg);
-      str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(*reg) << "\n";
+      assembler->setcc(static_cast<x86_64::Condition>(i), reg);
+      str << "set" << suffixes[i] << " %" << assembler_test->GetQuaternaryRegisterName(reg) << "\n";
     }
   }
 
@@ -2459,27 +2457,27 @@ TEST_F(AssemblerX86_64Test, AddressDisplaceBy) {
 
   for (int32_t disp0 : displacements) {  // initial displacement
     for (int32_t disp : displacements) {  // extra displacement
-      for (const x86_64::CpuRegister* reg : GetRegisters()) {
+      for (const x86_64::CpuRegister reg : GetRegisters()) {
         // Test non-SIB addressing.
-        EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, disp0), disp),
-                  x86_64::Address(*reg, disp0 + disp));
+        EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, disp0), disp),
+                  x86_64::Address(reg, disp0 + disp));
 
         // Test SIB addressing with RBP base.
-        if (reg->AsRegister() != x86_64::RSP) {
+        if (reg.AsRegister() != x86_64::RSP) {
           for (ScaleFactor scale : scales) {
-            EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, scale, disp0), disp),
-                      x86_64::Address(*reg, scale, disp0 + disp));
+            EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, scale, disp0), disp),
+                      x86_64::Address(reg, scale, disp0 + disp));
           }
         }
 
         // Test SIB addressing with different base.
-        for (const x86_64::CpuRegister* index : GetRegisters()) {
-          if (index->AsRegister() == x86_64::RSP) {
+        for (const x86_64::CpuRegister& index : GetRegisters()) {
+          if (index.AsRegister() == x86_64::RSP) {
             continue;  // Skip RSP as it cannot be used with this address constructor.
           }
           for (ScaleFactor scale : scales) {
-            EXPECT_EQ(x86_64::Address::displace(x86_64::Address(*reg, *index, scale, disp0), disp),
-                      x86_64::Address(*reg, *index, scale, disp0 + disp));
+            EXPECT_EQ(x86_64::Address::displace(x86_64::Address(reg, index, scale, disp0), disp),
+                      x86_64::Address(reg, index, scale, disp0 + disp));
           }
         }
 
@@ -2513,7 +2511,7 @@ static x86_64::X86_64ManagedRegister ManagedFromFpu(x86_64::FloatRegister r) {
   return x86_64::X86_64ManagedRegister::FromXmmRegister(r);
 }
 
-std::string buildframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string buildframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
                                x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
@@ -2556,7 +2554,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, BuildFrame) {
   DriverFn(&buildframe_test_fn, "BuildFrame");
 }
 
-std::string removeframe_test_fn(JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+std::string removeframe_test_fn([[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
                                 x86_64::X86_64JNIMacroAssembler* assembler) {
   // TODO: more interesting spill registers / entry spills.
 
@@ -2588,7 +2586,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, RemoveFrame) {
 }
 
 std::string increaseframe_test_fn(
-    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
     x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->IncreaseFrameSize(0U);
   assembler->IncreaseFrameSize(kStackAlignment);
@@ -2608,7 +2606,7 @@ TEST_F(JNIMacroAssemblerX86_64Test, IncreaseFrame) {
 }
 
 std::string decreaseframe_test_fn(
-    JNIMacroAssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
+    [[maybe_unused]] JNIMacroAssemblerX86_64Test::Base* assembler_test,
     x86_64::X86_64JNIMacroAssembler* assembler) {
   assembler->DecreaseFrameSize(0U);
   assembler->DecreaseFrameSize(kStackAlignment);
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 388845730e..e9e6dbdae7 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -95,7 +95,7 @@ void X86_64JNIMacroAssembler::BuildFrame(size_t frame_size,
 
 void X86_64JNIMacroAssembler::RemoveFrame(size_t frame_size,
                                           ArrayRef<const ManagedRegister> spill_regs,
-                                          bool may_suspend ATTRIBUTE_UNUSED) {
+                                          [[maybe_unused]] bool may_suspend) {
   CHECK_ALIGNED(frame_size, kNativeStackAlignment);
   cfi().RememberState();
   int gpr_count = 0;
@@ -515,7 +515,7 @@ void X86_64JNIMacroAssembler::GetCurrentThread(FrameOffset offset) {
 }
 
 void X86_64JNIMacroAssembler::TryToTransitionFromRunnableToNative(
-    JNIMacroLabel* label, ArrayRef<const ManagedRegister> scratch_regs ATTRIBUTE_UNUSED) {
+    JNIMacroLabel* label, [[maybe_unused]] ArrayRef<const ManagedRegister> scratch_regs) {
   constexpr uint32_t kNativeStateValue = Thread::StoredThreadStateValue(ThreadState::kNative);
   constexpr uint32_t kRunnableStateValue = Thread::StoredThreadStateValue(ThreadState::kRunnable);
   constexpr ThreadOffset64 thread_flags_offset = Thread::ThreadFlagsOffset<kX86_64PointerSize>();