diff options
Diffstat (limited to 'compiler')
81 files changed, 3285 insertions, 1658 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 88fdee0f10..cfce9f70ce 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -60,7 +60,9 @@ LIBART_COMPILER_SRC_FILES := \ dex/mir_method_info.cc \ dex/mir_optimization.cc \ dex/bb_optimizations.cc \ - dex/pass_driver_me.cc \ + dex/post_opt_passes.cc \ + dex/pass_driver_me_opts.cc \ + dex/pass_driver_me_post_opt.cc \ dex/frontend.cc \ dex/mir_graph.cc \ dex/mir_analysis.cc \ @@ -177,8 +179,10 @@ define build-libart-compiler LOCAL_CPP_EXTENSION := $(ART_CPP_EXTENSION) ifeq ($$(art_ndebug_or_debug),ndebug) LOCAL_MODULE := libart-compiler + LOCAL_SHARED_LIBRARIES += libart else # debug LOCAL_MODULE := libartd-compiler + LOCAL_SHARED_LIBRARIES += libartd endif LOCAL_MODULE_TAGS := optional @@ -200,32 +204,21 @@ $$(ENUM_OPERATOR_OUT_GEN): $$(GENERATED_SRC_DIR)/%_operator_out.cc : $(LOCAL_PAT LOCAL_CFLAGS := $$(LIBART_COMPILER_CFLAGS) include external/libcxx/libcxx.mk ifeq ($$(art_target_or_host),target) - LOCAL_CLANG := $(ART_TARGET_CLANG) - LOCAL_CFLAGS += $(ART_TARGET_CFLAGS) + $(call set-target-local-clang-vars) + $(call set-target-local-cflags-vars,$(2)) else # host LOCAL_CLANG := $(ART_HOST_CLANG) LOCAL_CFLAGS += $(ART_HOST_CFLAGS) + ifeq ($$(art_ndebug_or_debug),debug) + LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS) + else + LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS) + endif endif # TODO: clean up the compilers and remove this. LOCAL_CFLAGS += -Wno-unused-parameter - LOCAL_SHARED_LIBRARIES += liblog - ifeq ($$(art_ndebug_or_debug),debug) - ifeq ($$(art_target_or_host),target) - LOCAL_CFLAGS += $(ART_TARGET_DEBUG_CFLAGS) - else # host - LOCAL_CFLAGS += $(ART_HOST_DEBUG_CFLAGS) - endif - LOCAL_SHARED_LIBRARIES += libartd - else - ifeq ($$(art_target_or_host),target) - LOCAL_CFLAGS += $(ART_TARGET_NON_DEBUG_CFLAGS) - else # host - LOCAL_CFLAGS += $(ART_HOST_NON_DEBUG_CFLAGS) - endif - LOCAL_SHARED_LIBRARIES += libart - endif ifeq ($(ART_USE_PORTABLE_COMPILER),true) LOCAL_SHARED_LIBRARIES += libLLVM LOCAL_CFLAGS += -DART_USE_PORTABLE_COMPILER=1 diff --git a/compiler/dex/bb_optimizations.cc b/compiler/dex/bb_optimizations.cc index 8b5eba0f67..06e259a65f 100644 --- a/compiler/dex/bb_optimizations.cc +++ b/compiler/dex/bb_optimizations.cc @@ -26,83 +26,11 @@ namespace art { bool CodeLayout::Worker(const PassDataHolder* data) const { DCHECK(data != nullptr); const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); - CompilationUnit* cUnit = pass_me_data_holder->c_unit; - DCHECK(cUnit != nullptr); + CompilationUnit* c_unit = pass_me_data_holder->c_unit; + DCHECK(c_unit != nullptr); BasicBlock* bb = pass_me_data_holder->bb; DCHECK(bb != nullptr); - cUnit->mir_graph->LayoutBlocks(bb); - // No need of repeating, so just return false. - return false; -} - -/* - * SSATransformation pass implementation start. - */ -void SSATransformation::Start(const PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(cUnit != nullptr); - cUnit->mir_graph->SSATransformationStart(); -} - -bool SSATransformation::Worker(const PassDataHolder* data) const { - DCHECK(data != nullptr); - const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); - CompilationUnit* cUnit = pass_me_data_holder->c_unit; - DCHECK(cUnit != nullptr); - BasicBlock* bb = pass_me_data_holder->bb; - DCHECK(bb != nullptr); - cUnit->mir_graph->InsertPhiNodeOperands(bb); - // No need of repeating, so just return false. - return false; -} - -void SSATransformation::End(const PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(cUnit != nullptr); - cUnit->mir_graph->SSATransformationEnd(); -} - -/* - * ConstantPropagation pass implementation start - */ -bool ConstantPropagation::Worker(const PassDataHolder* data) const { - DCHECK(data != nullptr); - const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); - CompilationUnit* cUnit = pass_me_data_holder->c_unit; - DCHECK(cUnit != nullptr); - BasicBlock* bb = pass_me_data_holder->bb; - DCHECK(bb != nullptr); - cUnit->mir_graph->DoConstantPropagation(bb); - // No need of repeating, so just return false. - return false; -} - -/* - * MethodUseCount pass implementation start. - */ -bool MethodUseCount::Gate(const PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(cUnit != nullptr); - // First initialize the data. - cUnit->mir_graph->InitializeMethodUses(); - - // Now check if the pass is to be ignored. - bool res = ((cUnit->disable_opt & (1 << kPromoteRegs)) == 0); - - return res; -} - -bool MethodUseCount::Worker(const PassDataHolder* data) const { - DCHECK(data != nullptr); - const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); - CompilationUnit* cUnit = pass_me_data_holder->c_unit; - DCHECK(cUnit != nullptr); - BasicBlock* bb = pass_me_data_holder->bb; - DCHECK(bb != nullptr); - cUnit->mir_graph->CountUses(bb); + c_unit->mir_graph->LayoutBlocks(bb); // No need of repeating, so just return false. return false; } @@ -113,11 +41,11 @@ bool MethodUseCount::Worker(const PassDataHolder* data) const { bool BBCombine::Worker(const PassDataHolder* data) const { DCHECK(data != nullptr); const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); - CompilationUnit* cUnit = pass_me_data_holder->c_unit; - DCHECK(cUnit != nullptr); + CompilationUnit* c_unit = pass_me_data_holder->c_unit; + DCHECK(c_unit != nullptr); BasicBlock* bb = pass_me_data_holder->bb; DCHECK(bb != nullptr); - cUnit->mir_graph->CombineBlocks(bb); + c_unit->mir_graph->CombineBlocks(bb); // No need of repeating, so just return false. return false; @@ -128,15 +56,15 @@ bool BBCombine::Worker(const PassDataHolder* data) const { */ void BBOptimizations::Start(const PassDataHolder* data) const { DCHECK(data != nullptr); - CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(cUnit != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); /* * This pass has a different ordering depEnding on the suppress exception, * so do the pass here for now: * - Later, the Start should just change the ordering and we can move the extended * creation into the pass driver's main job with a new iterator */ - cUnit->mir_graph->BasicBlockOptimization(); + c_unit->mir_graph->BasicBlockOptimization(); } } // namespace art diff --git a/compiler/dex/bb_optimizations.h b/compiler/dex/bb_optimizations.h index 3a529f2096..00947902e7 100644 --- a/compiler/dex/bb_optimizations.h +++ b/compiler/dex/bb_optimizations.h @@ -119,7 +119,7 @@ class CallInlining : public PassME { */ class CodeLayout : public PassME { public: - CodeLayout() : PassME("CodeLayout", "2_post_layout_cfg") { + CodeLayout() : PassME("CodeLayout", kAllNodes, kOptimizationBasicBlockChange, "2_post_layout_cfg") { } void Start(const PassDataHolder* data) const { @@ -133,72 +133,6 @@ class CodeLayout : public PassME { }; /** - * @class SSATransformation - * @brief Perform an SSA representation pass on the CompilationUnit. - */ -class SSATransformation : public PassME { - public: - SSATransformation() : PassME("SSATransformation", kPreOrderDFSTraversal, "3_post_ssa_cfg") { - } - - bool Worker(const PassDataHolder* data) const; - - void Start(const PassDataHolder* data) const; - - void End(const PassDataHolder* data) const; -}; - -/** - * @class ConstantPropagation - * @brief Perform a constant propagation pass. - */ -class ConstantPropagation : public PassME { - public: - ConstantPropagation() : PassME("ConstantPropagation") { - } - - bool Worker(const PassDataHolder* data) const; - - void Start(const PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(cUnit != nullptr); - cUnit->mir_graph->InitializeConstantPropagation(); - } -}; - -/** - * @class InitRegLocations - * @brief Initialize Register Locations. - */ -class InitRegLocations : public PassME { - public: - InitRegLocations() : PassME("InitRegLocation", kNoNodes) { - } - - void Start(const PassDataHolder* data) const { - DCHECK(data != nullptr); - CompilationUnit* cUnit = down_cast<const PassMEDataHolder*>(data)->c_unit; - DCHECK(cUnit != nullptr); - cUnit->mir_graph->InitRegLocations(); - } -}; - -/** - * @class MethodUseCount - * @brief Count the register uses of the method - */ -class MethodUseCount : public PassME { - public: - MethodUseCount() : PassME("UseCount") { - } - - bool Worker(const PassDataHolder* data) const; - - bool Gate(const PassDataHolder* data) const; -}; - -/** * @class NullCheckEliminationAndTypeInference * @brief Null check elimination and type inference. */ diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h index 767ffbf432..eb48cc3783 100644 --- a/compiler/dex/compiler_enums.h +++ b/compiler/dex/compiler_enums.h @@ -25,6 +25,7 @@ enum RegisterClass { kInvalidRegClass, kCoreReg, kFPReg, + kRefReg, kAnyReg, }; diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h index 35d777ec7a..66fb608d39 100644 --- a/compiler/dex/compiler_ir.h +++ b/compiler/dex/compiler_ir.h @@ -88,6 +88,7 @@ struct CompilationUnit { std::unique_ptr<MIRGraph> mir_graph; // MIR container. std::unique_ptr<Backend> cg; // Target-specific codegen. TimingLogger timings; + bool print_pass; // Do we want to print a pass or not? }; } // namespace art diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc index c3f694da50..58d2ed2f0d 100644 --- a/compiler/dex/frontend.cc +++ b/compiler/dex/frontend.cc @@ -21,7 +21,7 @@ #include "dataflow_iterator-inl.h" #include "leb128.h" #include "mirror/object.h" -#include "pass_driver_me.h" +#include "pass_driver_me_opts.h" #include "runtime.h" #include "base/logging.h" #include "base/timing_logger.h" @@ -105,7 +105,8 @@ CompilationUnit::CompilationUnit(ArenaPool* pool) arena_stack(pool), mir_graph(nullptr), cg(nullptr), - timings("QuickCompiler", true, false) { + timings("QuickCompiler", true, false), + print_pass(false) { } CompilationUnit::~CompilationUnit() { @@ -133,39 +134,133 @@ void CompilationUnit::EndTiming() { } } +// Enable opcodes that mostly work, but produce assertion errors (thus breaking libartd.so). +#define ARM64_USE_EXPERIMENTAL_OPCODES 0 + // TODO: Remove this when we are able to compile everything. int arm64_support_list[] = { Instruction::NOP, Instruction::MOVE, Instruction::MOVE_FROM16, Instruction::MOVE_16, + Instruction::MOVE_EXCEPTION, + Instruction::RETURN_VOID, + Instruction::RETURN, + Instruction::RETURN_WIDE, + Instruction::CONST_4, + Instruction::CONST_16, + Instruction::CONST, + Instruction::CONST_STRING, + Instruction::MONITOR_ENTER, + Instruction::MONITOR_EXIT, + Instruction::THROW, + Instruction::GOTO, + Instruction::GOTO_16, + Instruction::GOTO_32, + Instruction::IF_EQ, + Instruction::IF_NE, + Instruction::IF_LT, + Instruction::IF_GE, + Instruction::IF_GT, + Instruction::IF_LE, + Instruction::IF_EQZ, + Instruction::IF_NEZ, + Instruction::IF_LTZ, + Instruction::IF_GEZ, + Instruction::IF_GTZ, + Instruction::IF_LEZ, + Instruction::NEG_INT, + Instruction::NOT_INT, + Instruction::NEG_FLOAT, + Instruction::INT_TO_BYTE, + Instruction::INT_TO_CHAR, + Instruction::INT_TO_SHORT, + Instruction::ADD_INT, + Instruction::SUB_INT, + Instruction::MUL_INT, + Instruction::DIV_INT, + Instruction::REM_INT, + Instruction::AND_INT, + Instruction::OR_INT, + Instruction::XOR_INT, + Instruction::SHL_INT, + Instruction::SHR_INT, + Instruction::USHR_INT, + Instruction::ADD_FLOAT, + Instruction::SUB_FLOAT, + Instruction::MUL_FLOAT, + Instruction::DIV_FLOAT, + Instruction::ADD_INT_2ADDR, + Instruction::SUB_INT_2ADDR, + Instruction::MUL_INT_2ADDR, + Instruction::DIV_INT_2ADDR, + Instruction::REM_INT_2ADDR, + Instruction::AND_INT_2ADDR, + Instruction::OR_INT_2ADDR, + Instruction::XOR_INT_2ADDR, + Instruction::SHL_INT_2ADDR, + Instruction::SHR_INT_2ADDR, + Instruction::USHR_INT_2ADDR, + Instruction::ADD_FLOAT_2ADDR, + Instruction::SUB_FLOAT_2ADDR, + Instruction::MUL_FLOAT_2ADDR, + Instruction::DIV_FLOAT_2ADDR, + Instruction::ADD_INT_LIT16, + Instruction::RSUB_INT, + Instruction::MUL_INT_LIT16, + Instruction::DIV_INT_LIT16, + Instruction::REM_INT_LIT16, + Instruction::AND_INT_LIT16, + Instruction::OR_INT_LIT16, + Instruction::XOR_INT_LIT16, + Instruction::ADD_INT_LIT8, + Instruction::RSUB_INT_LIT8, + Instruction::MUL_INT_LIT8, + Instruction::DIV_INT_LIT8, + Instruction::REM_INT_LIT8, + Instruction::AND_INT_LIT8, + Instruction::OR_INT_LIT8, + Instruction::XOR_INT_LIT8, + Instruction::SHL_INT_LIT8, + Instruction::SHR_INT_LIT8, + Instruction::USHR_INT_LIT8, + // TODO(Arm64): Enable compiler pass + // ----- ExtendedMIROpcode ----- + kMirOpPhi, + kMirOpCopy, + kMirOpFusedCmplFloat, + kMirOpFusedCmpgFloat, + kMirOpFusedCmplDouble, + kMirOpFusedCmpgDouble, + kMirOpFusedCmpLong, + kMirOpNop, + kMirOpNullCheck, + kMirOpRangeCheck, + kMirOpDivZeroCheck, + kMirOpCheck, + kMirOpCheckPart2, + kMirOpSelect, + +#if ARM64_USE_EXPERIMENTAL_OPCODES Instruction::MOVE_WIDE, Instruction::MOVE_WIDE_FROM16, Instruction::MOVE_WIDE_16, Instruction::MOVE_OBJECT, Instruction::MOVE_OBJECT_FROM16, Instruction::MOVE_OBJECT_16, + // Instruction::PACKED_SWITCH, + // Instruction::SPARSE_SWITCH, // Instruction::MOVE_RESULT, // Instruction::MOVE_RESULT_WIDE, // Instruction::MOVE_RESULT_OBJECT, - Instruction::MOVE_EXCEPTION, - Instruction::RETURN_VOID, - Instruction::RETURN, - Instruction::RETURN_WIDE, // Instruction::RETURN_OBJECT, - // Instruction::CONST_4, - // Instruction::CONST_16, - // Instruction::CONST, // Instruction::CONST_HIGH16, // Instruction::CONST_WIDE_16, // Instruction::CONST_WIDE_32, // Instruction::CONST_WIDE, // Instruction::CONST_WIDE_HIGH16, - // Instruction::CONST_STRING, // Instruction::CONST_STRING_JUMBO, // Instruction::CONST_CLASS, - Instruction::MONITOR_ENTER, - Instruction::MONITOR_EXIT, // Instruction::CHECK_CAST, // Instruction::INSTANCE_OF, // Instruction::ARRAY_LENGTH, @@ -174,29 +269,11 @@ int arm64_support_list[] = { // Instruction::FILLED_NEW_ARRAY, // Instruction::FILLED_NEW_ARRAY_RANGE, // Instruction::FILL_ARRAY_DATA, - Instruction::THROW, - // Instruction::GOTO, - // Instruction::GOTO_16, - // Instruction::GOTO_32, - // Instruction::PACKED_SWITCH, - // Instruction::SPARSE_SWITCH, Instruction::CMPL_FLOAT, Instruction::CMPG_FLOAT, Instruction::CMPL_DOUBLE, Instruction::CMPG_DOUBLE, Instruction::CMP_LONG, - // Instruction::IF_EQ, - // Instruction::IF_NE, - // Instruction::IF_LT, - // Instruction::IF_GE, - // Instruction::IF_GT, - // Instruction::IF_LE, - // Instruction::IF_EQZ, - // Instruction::IF_NEZ, - // Instruction::IF_LTZ, - // Instruction::IF_GEZ, - // Instruction::IF_GTZ, - // Instruction::IF_LEZ, // Instruction::UNUSED_3E, // Instruction::UNUSED_3F, // Instruction::UNUSED_40, @@ -258,11 +335,8 @@ int arm64_support_list[] = { // Instruction::INVOKE_INTERFACE_RANGE, // Instruction::UNUSED_79, // Instruction::UNUSED_7A, - Instruction::NEG_INT, - Instruction::NOT_INT, Instruction::NEG_LONG, Instruction::NOT_LONG, - Instruction::NEG_FLOAT, Instruction::NEG_DOUBLE, Instruction::INT_TO_LONG, Instruction::INT_TO_FLOAT, @@ -276,20 +350,6 @@ int arm64_support_list[] = { Instruction::DOUBLE_TO_INT, Instruction::DOUBLE_TO_LONG, Instruction::DOUBLE_TO_FLOAT, - Instruction::INT_TO_BYTE, - Instruction::INT_TO_CHAR, - Instruction::INT_TO_SHORT, - Instruction::ADD_INT, - Instruction::SUB_INT, - Instruction::MUL_INT, - Instruction::DIV_INT, - Instruction::REM_INT, - Instruction::AND_INT, - Instruction::OR_INT, - Instruction::XOR_INT, - Instruction::SHL_INT, - Instruction::SHR_INT, - Instruction::USHR_INT, Instruction::ADD_LONG, Instruction::SUB_LONG, Instruction::MUL_LONG, @@ -301,27 +361,12 @@ int arm64_support_list[] = { Instruction::SHL_LONG, Instruction::SHR_LONG, Instruction::USHR_LONG, - Instruction::ADD_FLOAT, - Instruction::SUB_FLOAT, - Instruction::MUL_FLOAT, - Instruction::DIV_FLOAT, // Instruction::REM_FLOAT, Instruction::ADD_DOUBLE, Instruction::SUB_DOUBLE, Instruction::MUL_DOUBLE, Instruction::DIV_DOUBLE, // Instruction::REM_DOUBLE, - Instruction::ADD_INT_2ADDR, - Instruction::SUB_INT_2ADDR, - Instruction::MUL_INT_2ADDR, - Instruction::DIV_INT_2ADDR, - Instruction::REM_INT_2ADDR, - Instruction::AND_INT_2ADDR, - Instruction::OR_INT_2ADDR, - Instruction::XOR_INT_2ADDR, - Instruction::SHL_INT_2ADDR, - Instruction::SHR_INT_2ADDR, - Instruction::USHR_INT_2ADDR, Instruction::ADD_LONG_2ADDR, Instruction::SUB_LONG_2ADDR, Instruction::MUL_LONG_2ADDR, @@ -333,35 +378,12 @@ int arm64_support_list[] = { Instruction::SHL_LONG_2ADDR, Instruction::SHR_LONG_2ADDR, Instruction::USHR_LONG_2ADDR, - Instruction::ADD_FLOAT_2ADDR, - Instruction::SUB_FLOAT_2ADDR, - Instruction::MUL_FLOAT_2ADDR, - Instruction::DIV_FLOAT_2ADDR, // Instruction::REM_FLOAT_2ADDR, Instruction::ADD_DOUBLE_2ADDR, Instruction::SUB_DOUBLE_2ADDR, Instruction::MUL_DOUBLE_2ADDR, Instruction::DIV_DOUBLE_2ADDR, // Instruction::REM_DOUBLE_2ADDR, - Instruction::ADD_INT_LIT16, - Instruction::RSUB_INT, - Instruction::MUL_INT_LIT16, - Instruction::DIV_INT_LIT16, - Instruction::REM_INT_LIT16, - Instruction::AND_INT_LIT16, - Instruction::OR_INT_LIT16, - Instruction::XOR_INT_LIT16, - Instruction::ADD_INT_LIT8, - Instruction::RSUB_INT_LIT8, - Instruction::MUL_INT_LIT8, - Instruction::DIV_INT_LIT8, - Instruction::REM_INT_LIT8, - Instruction::AND_INT_LIT8, - Instruction::OR_INT_LIT8, - Instruction::XOR_INT_LIT8, - Instruction::SHL_INT_LIT8, - Instruction::SHR_INT_LIT8, - Instruction::USHR_INT_LIT8, // Instruction::IGET_QUICK, // Instruction::IGET_WIDE_QUICK, // Instruction::IGET_OBJECT_QUICK, @@ -391,23 +413,7 @@ int arm64_support_list[] = { // Instruction::UNUSED_FD, // Instruction::UNUSED_FE, // Instruction::UNUSED_FF, - - // ----- ExtendedMIROpcode ----- - // kMirOpPhi, - // kMirOpCopy, - // kMirOpFusedCmplFloat, - // kMirOpFusedCmpgFloat, - // kMirOpFusedCmplDouble, - // kMirOpFusedCmpgDouble, - // kMirOpFusedCmpLong, - // kMirOpNop, - // kMirOpNullCheck, - // kMirOpRangeCheck, - kMirOpDivZeroCheck, - kMirOpCheck, - // kMirOpCheckPart2, - // kMirOpSelect, - // kMirOpLast, +#endif /* ARM64_USE_EXPERIMENTAL_OPCODES */ }; // TODO: Remove this when we are able to compile everything. @@ -749,7 +755,7 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, } for (int idx = 0; idx < cu.mir_graph->GetNumBlocks(); idx++) { - BasicBlock *bb = cu.mir_graph->GetBasicBlock(idx); + BasicBlock* bb = cu.mir_graph->GetBasicBlock(idx); if (bb == NULL) continue; if (bb->block_type == kDead) continue; for (MIR* mir = bb->first_mir_insn; mir != nullptr; mir = mir->next) { @@ -757,7 +763,7 @@ static bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file, // Check if we support the byte code. if (std::find(support_list, support_list + support_list_size, opcode) == support_list + support_list_size) { - if (opcode < kMirOpFirst) { + if (!cu.mir_graph->IsPseudoMirOp(opcode)) { VLOG(compiler) << "Unsupported dalvik byte code : " << mir->dalvikInsn.opcode; } else { @@ -925,7 +931,7 @@ static CompiledMethod* CompileMethod(CompilerDriver& driver, } /* Create the pass driver and launch it */ - PassDriverME pass_driver(&cu); + PassDriverMEOpts pass_driver(&cu); pass_driver.Launch(); if (cu.enable_debug & (1 << kDebugDumpCheckStats)) { diff --git a/compiler/dex/local_value_numbering.cc b/compiler/dex/local_value_numbering.cc index c0068b2331..62594963fc 100644 --- a/compiler/dex/local_value_numbering.cc +++ b/compiler/dex/local_value_numbering.cc @@ -21,8 +21,48 @@ namespace art { -uint16_t LocalValueNumbering::GetFieldId(const DexFile* dex_file, uint16_t field_idx) { - FieldReference key = { dex_file, field_idx }; +namespace { // anonymous namespace + +// Operations used for value map keys instead of actual opcode. +static constexpr uint16_t kInvokeMemoryVersionBumpOp = Instruction::INVOKE_DIRECT; +static constexpr uint16_t kUnresolvedSFieldOp = Instruction::SPUT; +static constexpr uint16_t kResolvedSFieldOp = Instruction::SGET; +static constexpr uint16_t kUnresolvedIFieldOp = Instruction::IPUT; +static constexpr uint16_t kNonAliasingIFieldOp = Instruction::IGET; +static constexpr uint16_t kAliasingIFieldOp = Instruction::IGET_WIDE; +static constexpr uint16_t kAliasingIFieldStartVersionOp = Instruction::IGET_WIDE; +static constexpr uint16_t kAliasingIFieldBumpVersionOp = Instruction::IGET_OBJECT; +static constexpr uint16_t kArrayAccessLocOp = Instruction::APUT; +static constexpr uint16_t kNonAliasingArrayOp = Instruction::AGET; +static constexpr uint16_t kNonAliasingArrayStartVersionOp = Instruction::AGET_WIDE; +static constexpr uint16_t kAliasingArrayOp = Instruction::AGET_OBJECT; +static constexpr uint16_t kAliasingArrayMemoryVersionOp = Instruction::AGET_BOOLEAN; +static constexpr uint16_t kAliasingArrayBumpVersionOp = Instruction::AGET_BYTE; + +} // anonymous namespace + +LocalValueNumbering::LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator) + : cu_(cu), + last_value_(0u), + sreg_value_map_(std::less<uint16_t>(), allocator->Adapter()), + sreg_wide_value_map_(std::less<uint16_t>(), allocator->Adapter()), + value_map_(std::less<uint64_t>(), allocator->Adapter()), + global_memory_version_(0u), + aliasing_ifield_version_map_(std::less<uint16_t>(), allocator->Adapter()), + non_aliasing_array_version_map_(std::less<uint16_t>(), allocator->Adapter()), + field_index_map_(FieldReferenceComparator(), allocator->Adapter()), + non_aliasing_refs_(std::less<uint16_t>(), allocator->Adapter()), + non_aliasing_ifields_(NonAliasingIFieldKeyComparator(), allocator->Adapter()), + escaped_array_refs_(EscapedArrayKeyComparator(), allocator->Adapter()), + range_checked_(RangeCheckKeyComparator() , allocator->Adapter()), + null_checked_(std::less<uint16_t>(), allocator->Adapter()) { + std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u); + std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u); + std::fill_n(aliasing_array_version_, kFieldTypeCount, 0u); +} + +uint16_t LocalValueNumbering::GetFieldId(const MirFieldInfo& field_info) { + FieldReference key = { field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex() }; auto it = field_index_map_.find(key); if (it != field_index_map_.end()) { return it->second; @@ -32,62 +72,6 @@ uint16_t LocalValueNumbering::GetFieldId(const DexFile* dex_file, uint16_t field return id; } -void LocalValueNumbering::AdvanceGlobalMemory() { - // See AdvanceMemoryVersion() for explanation. - global_memory_version_ = next_memory_version_; - ++next_memory_version_; -} - -uint16_t LocalValueNumbering::GetMemoryVersion(uint16_t base, uint16_t field, uint16_t type) { - // See AdvanceMemoryVersion() for explanation. - MemoryVersionKey key = { base, field, type }; - MemoryVersionMap::iterator it = memory_version_map_.find(key); - uint16_t memory_version = (it != memory_version_map_.end()) ? it->second : 0u; - if (base != NO_VALUE && non_aliasing_refs_.find(base) == non_aliasing_refs_.end()) { - // Check modifications by potentially aliased access. - MemoryVersionKey aliased_access_key = { NO_VALUE, field, type }; - auto aa_it = memory_version_map_.find(aliased_access_key); - if (aa_it != memory_version_map_.end() && aa_it->second > memory_version) { - memory_version = aa_it->second; - } - memory_version = std::max(memory_version, global_memory_version_); - } else if (base != NO_VALUE) { - // Ignore global_memory_version_ for access via unique references. - } else { - memory_version = std::max(memory_version, global_memory_version_); - } - return memory_version; -}; - -uint16_t LocalValueNumbering::AdvanceMemoryVersion(uint16_t base, uint16_t field, uint16_t type) { - // When we read the same value from memory, we want to assign the same value name to it. - // However, we need to be careful not to assign the same value name if the memory location - // may have been written to between the reads. To avoid that we do "memory versioning". - // - // For each write to a memory location (instance field, static field, array element) we assign - // a new memory version number to the location identified by the value name of the base register, - // the field id and type, or "{ base, field, type }". For static fields the "base" is NO_VALUE - // since they are not accessed via a reference. For arrays the "field" is NO_VALUE since they - // don't have a field id. - // - // To account for the possibility of aliased access to the same memory location via different - // "base", we also store the memory version number with the key "{ NO_VALUE, field, type }" - // if "base" is an aliasing reference and check it in GetMemoryVersion() on reads via - // aliasing references. A global memory version is set for method calls as a method can - // potentially write to any memory location accessed via an aliasing reference. - - uint16_t result = next_memory_version_; - ++next_memory_version_; - MemoryVersionKey key = { base, field, type }; - memory_version_map_.Overwrite(key, result); - if (base != NO_VALUE && non_aliasing_refs_.find(base) == non_aliasing_refs_.end()) { - // Advance memory version for aliased access. - MemoryVersionKey aliased_access_key = { NO_VALUE, field, type }; - memory_version_map_.Overwrite(aliased_access_key, result); - } - return result; -}; - uint16_t LocalValueNumbering::MarkNonAliasingNonNull(MIR* mir) { uint16_t res = GetOperandValue(mir->ssa_rep->defs[0]); SetOperandValue(mir->ssa_rep->defs[0], res); @@ -97,43 +81,332 @@ uint16_t LocalValueNumbering::MarkNonAliasingNonNull(MIR* mir) { return res; } -void LocalValueNumbering::MakeArgsAliasing(MIR* mir) { - for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) { - uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]); - non_aliasing_refs_.erase(reg); +bool LocalValueNumbering::IsNonAliasing(uint16_t reg) { + return non_aliasing_refs_.find(reg) != non_aliasing_refs_.end(); +} + +bool LocalValueNumbering::IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type) { + if (IsNonAliasing(reg)) { + return true; + } + NonAliasingIFieldKey key = { reg, field_id, type }; + return non_aliasing_ifields_.count(key) != 0u; +} + +bool LocalValueNumbering::IsNonAliasingArray(uint16_t reg, uint16_t type) { + if (IsNonAliasing(reg)) { + return true; } + EscapedArrayKey key = { reg, type }; + return escaped_array_refs_.count(key) != 0u; } + void LocalValueNumbering::HandleNullCheck(MIR* mir, uint16_t reg) { - if (null_checked_.find(reg) != null_checked_.end()) { - if (cu_->verbose) { - LOG(INFO) << "Removing null check for 0x" << std::hex << mir->offset; + auto lb = null_checked_.lower_bound(reg); + if (lb != null_checked_.end() && *lb == reg) { + if (LIKELY(Good())) { + if (cu_->verbose) { + LOG(INFO) << "Removing null check for 0x" << std::hex << mir->offset; + } + mir->optimization_flags |= MIR_IGNORE_NULL_CHECK; } - mir->optimization_flags |= MIR_IGNORE_NULL_CHECK; } else { - null_checked_.insert(reg); + null_checked_.insert(lb, reg); } } void LocalValueNumbering::HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index) { - if (ValueExists(ARRAY_REF, array, index, NO_VALUE)) { - if (cu_->verbose) { - LOG(INFO) << "Removing range check for 0x" << std::hex << mir->offset; + RangeCheckKey key = { array, index }; + auto lb = range_checked_.lower_bound(key); + if (lb != range_checked_.end() && !RangeCheckKeyComparator()(key, *lb)) { + if (LIKELY(Good())) { + if (cu_->verbose) { + LOG(INFO) << "Removing range check for 0x" << std::hex << mir->offset; + } + mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK; } - mir->optimization_flags |= MIR_IGNORE_RANGE_CHECK; + } else { + // Mark range check completed. + range_checked_.insert(lb, key); } - // Use side effect to note range check completed. - (void)LookupValue(ARRAY_REF, array, index, NO_VALUE); } void LocalValueNumbering::HandlePutObject(MIR* mir) { // If we're storing a non-aliasing reference, stop tracking it as non-aliasing now. uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]); - non_aliasing_refs_.erase(base); + HandleEscapingRef(base); +} + +void LocalValueNumbering::HandleEscapingRef(uint16_t base) { + auto it = non_aliasing_refs_.find(base); + if (it != non_aliasing_refs_.end()) { + uint64_t iget_key = BuildKey(Instruction::IGET, base, 0u, 0u); + for (auto iget_it = value_map_.lower_bound(iget_key), iget_end = value_map_.end(); + iget_it != iget_end && EqualOpAndOperand1(iget_it->first, iget_key); ++iget_it) { + uint16_t field_id = ExtractOperand2(iget_it->first); + uint16_t type = ExtractModifier(iget_it->first); + NonAliasingIFieldKey key = { base, field_id, type }; + non_aliasing_ifields_.insert(key); + } + uint64_t aget_key = BuildKey(kNonAliasingArrayStartVersionOp, base, 0u, 0u); + auto aget_it = value_map_.lower_bound(aget_key); + if (aget_it != value_map_.end() && EqualOpAndOperand1(aget_key, aget_it->first)) { + DCHECK_EQ(ExtractOperand2(aget_it->first), kNoValue); + uint16_t type = ExtractModifier(aget_it->first); + EscapedArrayKey key = { base, type }; + escaped_array_refs_.insert(key); + } + non_aliasing_refs_.erase(it); + } +} + +uint16_t LocalValueNumbering::HandleAGet(MIR* mir, uint16_t opcode) { + // uint16_t type = opcode - Instruction::AGET; + uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]); + HandleNullCheck(mir, array); + uint16_t index = GetOperandValue(mir->ssa_rep->uses[1]); + HandleRangeCheck(mir, array, index); + uint16_t type = opcode - Instruction::AGET; + // Establish value number for loaded register. + uint16_t res; + if (IsNonAliasingArray(array, type)) { + // Get the start version that accounts for aliasing within the array (different index names). + uint16_t start_version = LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, type); + // Find the current version from the non_aliasing_array_version_map_. + uint16_t memory_version = start_version; + auto it = non_aliasing_array_version_map_.find(start_version); + if (it != non_aliasing_array_version_map_.end()) { + memory_version = it->second; + } else { + // Just use the start_version. + } + res = LookupValue(kNonAliasingArrayOp, array, index, memory_version); + } else { + // Get the memory version of aliased array accesses of this type. + uint16_t memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_, + aliasing_array_version_[type], kNoValue); + res = LookupValue(kAliasingArrayOp, array, index, memory_version); + } + if (opcode == Instruction::AGET_WIDE) { + SetOperandValueWide(mir->ssa_rep->defs[0], res); + } else { + SetOperandValue(mir->ssa_rep->defs[0], res); + } + return res; +} + +void LocalValueNumbering::HandleAPut(MIR* mir, uint16_t opcode) { + int array_idx = (opcode == Instruction::APUT_WIDE) ? 2 : 1; + int index_idx = array_idx + 1; + uint16_t array = GetOperandValue(mir->ssa_rep->uses[array_idx]); + HandleNullCheck(mir, array); + uint16_t index = GetOperandValue(mir->ssa_rep->uses[index_idx]); + HandleRangeCheck(mir, array, index); + + uint16_t type = opcode - Instruction::APUT; + uint16_t value = (opcode == Instruction::APUT_WIDE) + ? GetOperandValueWide(mir->ssa_rep->uses[0]) + : GetOperandValue(mir->ssa_rep->uses[0]); + if (IsNonAliasing(array)) { + // Get the start version that accounts for aliasing within the array (different index values). + uint16_t start_version = LookupValue(kNonAliasingArrayStartVersionOp, array, kNoValue, type); + auto it = non_aliasing_array_version_map_.find(start_version); + uint16_t memory_version = start_version; + if (it != non_aliasing_array_version_map_.end()) { + memory_version = it->second; + } + // We need to take 4 values (array, index, memory_version, value) into account for bumping + // the memory version but the key can take only 3. Merge array and index into a location. + uint16_t array_access_location = LookupValue(kArrayAccessLocOp, array, index, kNoValue); + // Bump the version, adding to the chain. + memory_version = LookupValue(kAliasingArrayBumpVersionOp, memory_version, + array_access_location, value); + non_aliasing_array_version_map_.Overwrite(start_version, memory_version); + StoreValue(kNonAliasingArrayOp, array, index, memory_version, value); + } else { + // Get the memory version based on global_memory_version_ and aliasing_array_version_[type]. + uint16_t memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_, + aliasing_array_version_[type], kNoValue); + if (HasValue(kAliasingArrayOp, array, index, memory_version, value)) { + // This APUT can be eliminated, it stores the same value that's already in the field. + // TODO: Eliminate the APUT. + return; + } + // We need to take 4 values (array, index, memory_version, value) into account for bumping + // the memory version but the key can take only 3. Merge array and index into a location. + uint16_t array_access_location = LookupValue(kArrayAccessLocOp, array, index, kNoValue); + // Bump the version, adding to the chain. + uint16_t bumped_version = LookupValue(kAliasingArrayBumpVersionOp, memory_version, + array_access_location, value); + aliasing_array_version_[type] = bumped_version; + memory_version = LookupValue(kAliasingArrayMemoryVersionOp, global_memory_version_, + bumped_version, kNoValue); + StoreValue(kAliasingArrayOp, array, index, memory_version, value); + + // Clear escaped array refs for this type. + EscapedArrayKey array_key = { type, 0u }; + auto it = escaped_array_refs_.lower_bound(array_key), end = escaped_array_refs_.end(); + while (it != end && it->type == type) { + it = escaped_array_refs_.erase(it); + } + } +} + +uint16_t LocalValueNumbering::HandleIGet(MIR* mir, uint16_t opcode) { + uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]); + HandleNullCheck(mir, base); + const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir); + uint16_t res; + if (!field_info.IsResolved() || field_info.IsVolatile()) { + // Volatile fields always get a new memory version; field id is irrelevant. + // Unresolved fields may be volatile, so handle them as such to be safe. + // Use result s_reg - will be unique. + res = LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue); + } else { + uint16_t type = opcode - Instruction::IGET; + uint16_t field_id = GetFieldId(field_info); + if (IsNonAliasingIField(base, field_id, type)) { + res = LookupValue(kNonAliasingIFieldOp, base, field_id, type); + } else { + // Get the start version that accounts for aliasing with unresolved fields of the same type + // and make it unique for the field by including the field_id. + uint16_t start_version = LookupValue(kAliasingIFieldStartVersionOp, global_memory_version_, + unresolved_ifield_version_[type], field_id); + // Find the current version from the aliasing_ifield_version_map_. + uint16_t memory_version = start_version; + auto version_it = aliasing_ifield_version_map_.find(start_version); + if (version_it != aliasing_ifield_version_map_.end()) { + memory_version = version_it->second; + } else { + // Just use the start_version. + } + res = LookupValue(kAliasingIFieldOp, base, field_id, memory_version); + } + } + if (opcode == Instruction::IGET_WIDE) { + SetOperandValueWide(mir->ssa_rep->defs[0], res); + } else { + SetOperandValue(mir->ssa_rep->defs[0], res); + } + return res; +} + +void LocalValueNumbering::HandleIPut(MIR* mir, uint16_t opcode) { + uint16_t type = opcode - Instruction::IPUT; + int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1; + uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]); + HandleNullCheck(mir, base); + const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir); + if (!field_info.IsResolved()) { + // Unresolved fields always alias with everything of the same type. + // Use mir->offset as modifier; without elaborate inlining, it will be unique. + unresolved_ifield_version_[type] = + LookupValue(kUnresolvedIFieldOp, kNoValue, kNoValue, mir->offset); + + // Treat fields of escaped references of the same type as potentially modified. + NonAliasingIFieldKey key = { type, 0u, 0u }; // lowest possible key of this type. + auto it = non_aliasing_ifields_.lower_bound(key), end = non_aliasing_ifields_.end(); + while (it != end && it->type == type) { + it = non_aliasing_ifields_.erase(it); + } + } else if (field_info.IsVolatile()) { + // Nothing to do, resolved volatile fields always get a new memory version anyway and + // can't alias with resolved non-volatile fields. + } else { + uint16_t field_id = GetFieldId(field_info); + uint16_t value = (opcode == Instruction::IPUT_WIDE) + ? GetOperandValueWide(mir->ssa_rep->uses[0]) + : GetOperandValue(mir->ssa_rep->uses[0]); + if (IsNonAliasing(base)) { + StoreValue(kNonAliasingIFieldOp, base, field_id, type, value); + } else { + // Get the start version that accounts for aliasing with unresolved fields of the same type + // and make it unique for the field by including the field_id. + uint16_t start_version = LookupValue(kAliasingIFieldStartVersionOp, global_memory_version_, + unresolved_ifield_version_[type], field_id); + // Find the old version from the aliasing_ifield_version_map_. + uint16_t old_version = start_version; + auto version_it = aliasing_ifield_version_map_.find(start_version); + if (version_it != aliasing_ifield_version_map_.end()) { + old_version = version_it->second; + } + // Check if the field currently contains the value, making this a NOP. + if (HasValue(kAliasingIFieldOp, base, field_id, old_version, value)) { + // This IPUT can be eliminated, it stores the same value that's already in the field. + // TODO: Eliminate the IPUT. + return; + } + // Bump the version, adding to the chain started by start_version. + uint16_t memory_version = LookupValue(kAliasingIFieldBumpVersionOp, old_version, base, value); + // Update the aliasing_ifield_version_map_ so that HandleIGet() can get the memory_version + // without knowing the values used to build the chain. + aliasing_ifield_version_map_.Overwrite(start_version, memory_version); + StoreValue(kAliasingIFieldOp, base, field_id, memory_version, value); + + // Clear non-aliasing fields for this field_id. + NonAliasingIFieldKey field_key = { type, field_id, 0u }; + auto it = non_aliasing_ifields_.lower_bound(field_key), end = non_aliasing_ifields_.end(); + while (it != end && it->field_id == field_id) { + DCHECK_EQ(type, it->type); + it = non_aliasing_ifields_.erase(it); + } + } + } +} + +uint16_t LocalValueNumbering::HandleSGet(MIR* mir, uint16_t opcode) { + const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir); + uint16_t res; + if (!field_info.IsResolved() || field_info.IsVolatile()) { + // Volatile fields always get a new memory version; field id is irrelevant. + // Unresolved fields may be volatile, so handle them as such to be safe. + // Use result s_reg - will be unique. + res = LookupValue(kNoValue, mir->ssa_rep->defs[0], kNoValue, kNoValue); + } else { + uint16_t field_id = GetFieldId(field_info); + // Resolved non-volatile static fields can alias with non-resolved fields of the same type, + // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_ + // to determine the version of the field. + uint16_t type = opcode - Instruction::SGET; + res = LookupValue(kResolvedSFieldOp, field_id, + unresolved_sfield_version_[type], global_memory_version_); + } + if (opcode == Instruction::SGET_WIDE) { + SetOperandValueWide(mir->ssa_rep->defs[0], res); + } else { + SetOperandValue(mir->ssa_rep->defs[0], res); + } + return res; +} + +void LocalValueNumbering::HandleSPut(MIR* mir, uint16_t opcode) { + uint16_t type = opcode - Instruction::SPUT; + const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir); + if (!field_info.IsResolved()) { + // Unresolved fields always alias with everything of the same type. + // Use mir->offset as modifier; without elaborate inlining, it will be unique. + unresolved_sfield_version_[type] = + LookupValue(kUnresolvedSFieldOp, kNoValue, kNoValue, mir->offset); + } else if (field_info.IsVolatile()) { + // Nothing to do, resolved volatile fields always get a new memory version anyway and + // can't alias with resolved non-volatile fields. + } else { + uint16_t field_id = GetFieldId(field_info); + uint16_t value = (opcode == Instruction::SPUT_WIDE) + ? GetOperandValueWide(mir->ssa_rep->uses[0]) + : GetOperandValue(mir->ssa_rep->uses[0]); + // Resolved non-volatile static fields can alias with non-resolved fields of the same type, + // so we need to use unresolved_sfield_version_[type] in addition to global_memory_version_ + // to determine the version of the field. + uint16_t type = opcode - Instruction::SGET; + StoreValue(kResolvedSFieldOp, field_id, + unresolved_sfield_version_[type], global_memory_version_, value); + } } uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { - uint16_t res = NO_VALUE; + uint16_t res = kNoValue; uint16_t opcode = mir->dalvikInsn.opcode; switch (opcode) { case Instruction::NOP: @@ -176,9 +449,14 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { // Nothing defined but the result will be unique and non-null. if (mir->next != nullptr && mir->next->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) { MarkNonAliasingNonNull(mir->next); + // TUNING: We could track value names stored in the array. // The MOVE_RESULT_OBJECT will be processed next and we'll return the value name then. } - MakeArgsAliasing(mir); + // All args escaped (if references). + for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) { + uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]); + HandleEscapingRef(reg); + } break; case Instruction::INVOKE_DIRECT: @@ -197,8 +475,17 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::INVOKE_STATIC: case Instruction::INVOKE_STATIC_RANGE: if ((mir->optimization_flags & MIR_INLINED) == 0) { - AdvanceGlobalMemory(); - MakeArgsAliasing(mir); + // Use mir->offset as modifier; without elaborate inlining, it will be unique. + global_memory_version_ = LookupValue(kInvokeMemoryVersionBumpOp, 0u, 0u, mir->offset); + // Make ref args aliasing. + for (size_t i = 0u, count = mir->ssa_rep->num_uses; i != count; ++i) { + uint16_t reg = GetOperandValue(mir->ssa_rep->uses[i]); + non_aliasing_refs_.erase(reg); + } + // All fields of escaped references need to be treated as potentially modified. + non_aliasing_ifields_.clear(); + // Array elements may also have been modified via escaped array refs. + escaped_array_refs_.clear(); } break; @@ -211,13 +498,24 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { break; case Instruction::MOVE_EXCEPTION: case Instruction::NEW_INSTANCE: - case Instruction::CONST_STRING: - case Instruction::CONST_STRING_JUMBO: case Instruction::CONST_CLASS: case Instruction::NEW_ARRAY: // 1 result, treat as unique each time, use result s_reg - will be unique. res = MarkNonAliasingNonNull(mir); break; + case Instruction::CONST_STRING: + case Instruction::CONST_STRING_JUMBO: + // These strings are internalized, so assign value based on the string pool index. + res = LookupValue(Instruction::CONST_STRING, Low16Bits(mir->dalvikInsn.vB), + High16Bits(mir->dalvikInsn.vB), 0); + SetOperandValue(mir->ssa_rep->defs[0], res); + null_checked_.insert(res); // May already be there. + // NOTE: Hacking the contents of an internalized string via reflection is possible + // but the behavior is undefined. Therefore, we consider the string constant and + // the reference non-aliasing. + // TUNING: We could keep this property even if the reference "escapes". + non_aliasing_refs_.insert(res); // May already be there. + break; case Instruction::MOVE_RESULT_WIDE: // 1 wide result, treat as unique each time, use result s_reg - will be unique. res = GetOperandValueWide(mir->ssa_rep->defs[0]); @@ -255,7 +553,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::CONST_4: case Instruction::CONST_16: res = LookupValue(Instruction::CONST, Low16Bits(mir->dalvikInsn.vB), - High16Bits(mir->dalvikInsn.vB >> 16), 0); + High16Bits(mir->dalvikInsn.vB), 0); SetOperandValue(mir->ssa_rep->defs[0], res); break; @@ -310,7 +608,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::FLOAT_TO_INT: { // res = op + 1 operand uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]); - res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE); + res = LookupValue(opcode, operand1, kNoValue, kNoValue); SetOperandValue(mir->ssa_rep->defs[0], res); } break; @@ -320,8 +618,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::DOUBLE_TO_FLOAT: case Instruction::DOUBLE_TO_INT: { // res = op + 1 wide operand - uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]); - res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE); + uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]); + res = LookupValue(opcode, operand1, kNoValue, kNoValue); SetOperandValue(mir->ssa_rep->defs[0], res); } break; @@ -334,7 +632,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::NEG_DOUBLE: { // wide res = op + 1 wide operand uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]); - res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE); + res = LookupValue(opcode, operand1, kNoValue, kNoValue); SetOperandValueWide(mir->ssa_rep->defs[0], res); } break; @@ -344,8 +642,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::INT_TO_DOUBLE: case Instruction::INT_TO_LONG: { // wide res = op + 1 operand - uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]); - res = LookupValue(opcode, operand1, NO_VALUE, NO_VALUE); + uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]); + res = LookupValue(opcode, operand1, kNoValue, kNoValue); SetOperandValueWide(mir->ssa_rep->defs[0], res); } break; @@ -356,7 +654,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { // res = op + 2 wide operands uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]); uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]); - res = LookupValue(opcode, operand1, operand2, NO_VALUE); + res = LookupValue(opcode, operand1, operand2, kNoValue); SetOperandValue(mir->ssa_rep->defs[0], res); } break; @@ -388,7 +686,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { // res = op + 2 operands uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]); uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]); - res = LookupValue(opcode, operand1, operand2, NO_VALUE); + res = LookupValue(opcode, operand1, operand2, kNoValue); SetOperandValue(mir->ssa_rep->defs[0], res); } break; @@ -422,7 +720,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { // wide res = op + 2 wide operands uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]); uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]); - res = LookupValue(opcode, operand1, operand2, NO_VALUE); + res = LookupValue(opcode, operand1, operand2, kNoValue); SetOperandValueWide(mir->ssa_rep->defs[0], res); } break; @@ -435,8 +733,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::USHR_LONG_2ADDR: { // wide res = op + 1 wide operand + 1 operand uint16_t operand1 = GetOperandValueWide(mir->ssa_rep->uses[0]); - uint16_t operand2 = GetOperandValueWide(mir->ssa_rep->uses[2]); - res = LookupValue(opcode, operand1, operand2, NO_VALUE); + uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[2]); + res = LookupValue(opcode, operand1, operand2, kNoValue); SetOperandValueWide(mir->ssa_rep->defs[0], res); } break; @@ -454,7 +752,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { // res = op + 2 operands uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]); uint16_t operand2 = GetOperandValue(mir->ssa_rep->uses[1]); - res = LookupValue(opcode, operand1, operand2, NO_VALUE); + res = LookupValue(opcode, operand1, operand2, kNoValue); SetOperandValue(mir->ssa_rep->defs[0], res); } break; @@ -481,7 +779,7 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { // Same as res = op + 2 operands, except use vC as operand 2 uint16_t operand1 = GetOperandValue(mir->ssa_rep->uses[0]); uint16_t operand2 = LookupValue(Instruction::CONST, mir->dalvikInsn.vC, 0, 0); - res = LookupValue(opcode, operand1, operand2, NO_VALUE); + res = LookupValue(opcode, operand1, operand2, kNoValue); SetOperandValue(mir->ssa_rep->defs[0], res); } break; @@ -492,21 +790,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::AGET_BOOLEAN: case Instruction::AGET_BYTE: case Instruction::AGET_CHAR: - case Instruction::AGET_SHORT: { - uint16_t type = opcode - Instruction::AGET; - uint16_t array = GetOperandValue(mir->ssa_rep->uses[0]); - HandleNullCheck(mir, array); - uint16_t index = GetOperandValue(mir->ssa_rep->uses[1]); - HandleRangeCheck(mir, array, index); - // Establish value number for loaded register. Note use of memory version. - uint16_t memory_version = GetMemoryVersion(array, NO_VALUE, type); - uint16_t res = LookupValue(ARRAY_REF, array, index, memory_version); - if (opcode == Instruction::AGET_WIDE) { - SetOperandValueWide(mir->ssa_rep->defs[0], res); - } else { - SetOperandValue(mir->ssa_rep->defs[0], res); - } - } + case Instruction::AGET_SHORT: + res = HandleAGet(mir, opcode); break; case Instruction::APUT_OBJECT: @@ -517,17 +802,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::APUT_BYTE: case Instruction::APUT_BOOLEAN: case Instruction::APUT_SHORT: - case Instruction::APUT_CHAR: { - uint16_t type = opcode - Instruction::APUT; - int array_idx = (opcode == Instruction::APUT_WIDE) ? 2 : 1; - int index_idx = array_idx + 1; - uint16_t array = GetOperandValue(mir->ssa_rep->uses[array_idx]); - HandleNullCheck(mir, array); - uint16_t index = GetOperandValue(mir->ssa_rep->uses[index_idx]); - HandleRangeCheck(mir, array, index); - // Rev the memory version - AdvanceMemoryVersion(array, NO_VALUE, type); - } + case Instruction::APUT_CHAR: + HandleAPut(mir, opcode); break; case Instruction::IGET_OBJECT: @@ -536,33 +812,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::IGET_BOOLEAN: case Instruction::IGET_BYTE: case Instruction::IGET_CHAR: - case Instruction::IGET_SHORT: { - uint16_t type = opcode - Instruction::IGET; - uint16_t base = GetOperandValue(mir->ssa_rep->uses[0]); - HandleNullCheck(mir, base); - const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir); - uint16_t memory_version; - uint16_t field_id; - if (!field_info.IsResolved() || field_info.IsVolatile()) { - // Volatile fields always get a new memory version; field id is irrelevant. - // Unresolved fields may be volatile, so handle them as such to be safe. - field_id = 0u; - memory_version = next_memory_version_; - ++next_memory_version_; - } else { - DCHECK(field_info.IsResolved()); - field_id = GetFieldId(field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex()); - memory_version = std::max(unresolved_ifield_version_[type], - GetMemoryVersion(base, field_id, type)); - } - if (opcode == Instruction::IGET_WIDE) { - res = LookupValue(Instruction::IGET_WIDE, base, field_id, memory_version); - SetOperandValueWide(mir->ssa_rep->defs[0], res); - } else { - res = LookupValue(Instruction::IGET, base, field_id, memory_version); - SetOperandValue(mir->ssa_rep->defs[0], res); - } - } + case Instruction::IGET_SHORT: + res = HandleIGet(mir, opcode); break; case Instruction::IPUT_OBJECT: @@ -573,24 +824,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::IPUT_BOOLEAN: case Instruction::IPUT_BYTE: case Instruction::IPUT_CHAR: - case Instruction::IPUT_SHORT: { - uint16_t type = opcode - Instruction::IPUT; - int base_reg = (opcode == Instruction::IPUT_WIDE) ? 2 : 1; - uint16_t base = GetOperandValue(mir->ssa_rep->uses[base_reg]); - HandleNullCheck(mir, base); - const MirFieldInfo& field_info = cu_->mir_graph->GetIFieldLoweringInfo(mir); - if (!field_info.IsResolved()) { - // Unresolved fields always alias with everything of the same type. - unresolved_ifield_version_[type] = next_memory_version_; - ++next_memory_version_; - } else if (field_info.IsVolatile()) { - // Nothing to do, resolved volatile fields always get a new memory version anyway and - // can't alias with resolved non-volatile fields. - } else { - AdvanceMemoryVersion(base, GetFieldId(field_info.DeclaringDexFile(), - field_info.DeclaringFieldIndex()), type); - } - } + case Instruction::IPUT_SHORT: + HandleIPut(mir, opcode); break; case Instruction::SGET_OBJECT: @@ -599,31 +834,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::SGET_BOOLEAN: case Instruction::SGET_BYTE: case Instruction::SGET_CHAR: - case Instruction::SGET_SHORT: { - uint16_t type = opcode - Instruction::SGET; - const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir); - uint16_t memory_version; - uint16_t field_id; - if (!field_info.IsResolved() || field_info.IsVolatile()) { - // Volatile fields always get a new memory version; field id is irrelevant. - // Unresolved fields may be volatile, so handle them as such to be safe. - field_id = 0u; - memory_version = next_memory_version_; - ++next_memory_version_; - } else { - DCHECK(field_info.IsResolved()); - field_id = GetFieldId(field_info.DeclaringDexFile(), field_info.DeclaringFieldIndex()); - memory_version = std::max(unresolved_sfield_version_[type], - GetMemoryVersion(NO_VALUE, field_id, type)); - } - if (opcode == Instruction::SGET_WIDE) { - res = LookupValue(Instruction::SGET_WIDE, NO_VALUE, field_id, memory_version); - SetOperandValueWide(mir->ssa_rep->defs[0], res); - } else { - res = LookupValue(Instruction::SGET, NO_VALUE, field_id, memory_version); - SetOperandValue(mir->ssa_rep->defs[0], res); - } - } + case Instruction::SGET_SHORT: + res = HandleSGet(mir, opcode); break; case Instruction::SPUT_OBJECT: @@ -634,21 +846,8 @@ uint16_t LocalValueNumbering::GetValueNumber(MIR* mir) { case Instruction::SPUT_BOOLEAN: case Instruction::SPUT_BYTE: case Instruction::SPUT_CHAR: - case Instruction::SPUT_SHORT: { - uint16_t type = opcode - Instruction::SPUT; - const MirFieldInfo& field_info = cu_->mir_graph->GetSFieldLoweringInfo(mir); - if (!field_info.IsResolved()) { - // Unresolved fields always alias with everything of the same type. - unresolved_sfield_version_[type] = next_memory_version_; - ++next_memory_version_; - } else if (field_info.IsVolatile()) { - // Nothing to do, resolved volatile fields always get a new memory version anyway and - // can't alias with resolved non-volatile fields. - } else { - AdvanceMemoryVersion(NO_VALUE, GetFieldId(field_info.DeclaringDexFile(), - field_info.DeclaringFieldIndex()), type); - } - } + case Instruction::SPUT_SHORT: + HandleSPut(mir, opcode); break; } return res; diff --git a/compiler/dex/local_value_numbering.h b/compiler/dex/local_value_numbering.h index 0c2b6a7e01..2a815be1cc 100644 --- a/compiler/dex/local_value_numbering.h +++ b/compiler/dex/local_value_numbering.h @@ -23,15 +23,33 @@ #include "utils/scoped_arena_allocator.h" #include "utils/scoped_arena_containers.h" -#define NO_VALUE 0xffff -#define ARRAY_REF 0xfffe - namespace art { class DexFile; +class MirFieldInfo; class LocalValueNumbering { + public: + LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator); + + uint16_t GetValueNumber(MIR* mir); + + // LocalValueNumbering should be allocated on the ArenaStack (or the native stack). + static void* operator new(size_t size, ScopedArenaAllocator* allocator) { + return allocator->Alloc(sizeof(LocalValueNumbering), kArenaAllocMIR); + } + + // Allow delete-expression to destroy a LocalValueNumbering object without deallocation. + static void operator delete(void* ptr) { UNUSED(ptr); } + + // Checks that the value names didn't overflow. + bool Good() const { + return last_value_ < kNoValue; + } + private: + static constexpr uint16_t kNoValue = 0xffffu; + // Field types correspond to the ordering of GET/PUT instructions; this order is the same // for IGET, IPUT, SGET, SPUT, AGET and APUT: // op 0 @@ -43,7 +61,7 @@ class LocalValueNumbering { // op_SHORT 6 static constexpr size_t kFieldTypeCount = 7; - // FieldReference represents either a unique resolved field or all unresolved fields together. + // FieldReference represents a unique resolved field. struct FieldReference { const DexFile* dex_file; uint16_t field_idx; @@ -58,48 +76,107 @@ class LocalValueNumbering { } }; - struct MemoryVersionKey { + // Maps field key to field id for resolved fields. + typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap; + + struct RangeCheckKey { + uint16_t array; + uint16_t index; + }; + + struct RangeCheckKeyComparator { + bool operator()(const RangeCheckKey& lhs, const RangeCheckKey& rhs) const { + if (lhs.array != rhs.array) { + return lhs.array < rhs.array; + } + return lhs.index < rhs.index; + } + }; + + typedef ScopedArenaSet<RangeCheckKey, RangeCheckKeyComparator> RangeCheckSet; + + typedef ScopedArenaSafeMap<uint16_t, uint16_t> AliasingIFieldVersionMap; + typedef ScopedArenaSafeMap<uint16_t, uint16_t> NonAliasingArrayVersionMap; + + struct NonAliasingIFieldKey { uint16_t base; uint16_t field_id; uint16_t type; }; - struct MemoryVersionKeyComparator { - bool operator()(const MemoryVersionKey& lhs, const MemoryVersionKey& rhs) const { - if (lhs.base != rhs.base) { - return lhs.base < rhs.base; + struct NonAliasingIFieldKeyComparator { + bool operator()(const NonAliasingIFieldKey& lhs, const NonAliasingIFieldKey& rhs) const { + // Compare the type first. This allows iterating across all the entries for a certain type + // as needed when we need to purge them for an unresolved field IPUT. + if (lhs.type != rhs.type) { + return lhs.type < rhs.type; } + // Compare the field second. This allows iterating across all the entries for a certain + // field as needed when we need to purge them for an aliasing field IPUT. if (lhs.field_id != rhs.field_id) { return lhs.field_id < rhs.field_id; } - return lhs.type < rhs.type; + // Compare the base last. + return lhs.base < rhs.base; } }; + // Set of instance fields still holding non-aliased values after the base has been stored. + typedef ScopedArenaSet<NonAliasingIFieldKey, NonAliasingIFieldKeyComparator> NonAliasingFieldSet; + + struct EscapedArrayKey { + uint16_t base; + uint16_t type; + }; + + struct EscapedArrayKeyComparator { + bool operator()(const EscapedArrayKey& lhs, const EscapedArrayKey& rhs) const { + // Compare the type first. This allows iterating across all the entries for a certain type + // as needed when we need to purge them for an unresolved field APUT. + if (lhs.type != rhs.type) { + return lhs.type < rhs.type; + } + // Compare the base last. + return lhs.base < rhs.base; + } + }; + + // Set of previously non-aliasing array refs that escaped. + typedef ScopedArenaSet<EscapedArrayKey, EscapedArrayKeyComparator> EscapedArraySet; + // Key is s_reg, value is value name. typedef ScopedArenaSafeMap<uint16_t, uint16_t> SregValueMap; // Key is concatenation of opcode, operand1, operand2 and modifier, value is value name. typedef ScopedArenaSafeMap<uint64_t, uint16_t> ValueMap; // Key represents a memory address, value is generation. - typedef ScopedArenaSafeMap<MemoryVersionKey, uint16_t, MemoryVersionKeyComparator - > MemoryVersionMap; - // Maps field key to field id for resolved fields. - typedef ScopedArenaSafeMap<FieldReference, uint32_t, FieldReferenceComparator> FieldIndexMap; // A set of value names. typedef ScopedArenaSet<uint16_t> ValueNameSet; - public: - static LocalValueNumbering* Create(CompilationUnit* cu) { - std::unique_ptr<ScopedArenaAllocator> allocator(ScopedArenaAllocator::Create(&cu->arena_stack)); - void* addr = allocator->Alloc(sizeof(LocalValueNumbering), kArenaAllocMisc); - return new(addr) LocalValueNumbering(cu, allocator.release()); - } - static uint64_t BuildKey(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) { return (static_cast<uint64_t>(op) << 48 | static_cast<uint64_t>(operand1) << 32 | static_cast<uint64_t>(operand2) << 16 | static_cast<uint64_t>(modifier)); }; + static uint16_t ExtractOp(uint64_t key) { + return static_cast<uint16_t>(key >> 48); + } + + static uint16_t ExtractOperand1(uint64_t key) { + return static_cast<uint16_t>(key >> 32); + } + + static uint16_t ExtractOperand2(uint64_t key) { + return static_cast<uint16_t>(key >> 16); + } + + static uint16_t ExtractModifier(uint64_t key) { + return static_cast<uint16_t>(key); + } + + static bool EqualOpAndOperand1(uint64_t key1, uint64_t key2) { + return static_cast<uint32_t>(key1 >> 32) == static_cast<uint32_t>(key2 >> 32); + } + uint16_t LookupValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) { uint16_t res; uint64_t key = BuildKey(op, operand1, operand2, modifier); @@ -107,12 +184,26 @@ class LocalValueNumbering { if (it != value_map_.end()) { res = it->second; } else { - res = value_map_.size() + 1; + ++last_value_; + res = last_value_; value_map_.Put(key, res); } return res; }; + void StoreValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier, + uint16_t value) { + uint64_t key = BuildKey(op, operand1, operand2, modifier); + value_map_.Overwrite(key, value); + } + + bool HasValue(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier, + uint16_t value) const { + uint64_t key = BuildKey(op, operand1, operand2, modifier); + ValueMap::const_iterator it = value_map_.find(key); + return (it != value_map_.end() && it->second == value); + }; + bool ValueExists(uint16_t op, uint16_t operand1, uint16_t operand2, uint16_t modifier) const { uint64_t key = BuildKey(op, operand1, operand2, modifier); ValueMap::const_iterator it = value_map_.find(key); @@ -129,13 +220,13 @@ class LocalValueNumbering { }; uint16_t GetOperandValue(int s_reg) { - uint16_t res = NO_VALUE; + uint16_t res = kNoValue; SregValueMap::iterator it = sreg_value_map_.find(s_reg); if (it != sreg_value_map_.end()) { res = it->second; } else { // First use - res = LookupValue(NO_VALUE, s_reg, NO_VALUE, NO_VALUE); + res = LookupValue(kNoValue, s_reg, kNoValue, kNoValue); sreg_value_map_.Put(s_reg, res); } return res; @@ -151,63 +242,61 @@ class LocalValueNumbering { }; uint16_t GetOperandValueWide(int s_reg) { - uint16_t res = NO_VALUE; + uint16_t res = kNoValue; SregValueMap::iterator it = sreg_wide_value_map_.find(s_reg); if (it != sreg_wide_value_map_.end()) { res = it->second; } else { // First use - res = LookupValue(NO_VALUE, s_reg, NO_VALUE, NO_VALUE); + res = LookupValue(kNoValue, s_reg, kNoValue, kNoValue); sreg_wide_value_map_.Put(s_reg, res); } return res; }; - uint16_t GetValueNumber(MIR* mir); - - // Allow delete-expression to destroy a LocalValueNumbering object without deallocation. - static void operator delete(void* ptr) { UNUSED(ptr); } - - private: - LocalValueNumbering(CompilationUnit* cu, ScopedArenaAllocator* allocator) - : cu_(cu), - allocator_(allocator), - sreg_value_map_(std::less<uint16_t>(), allocator->Adapter()), - sreg_wide_value_map_(std::less<uint16_t>(), allocator->Adapter()), - value_map_(std::less<uint64_t>(), allocator->Adapter()), - next_memory_version_(1u), - global_memory_version_(0u), - memory_version_map_(MemoryVersionKeyComparator(), allocator->Adapter()), - field_index_map_(FieldReferenceComparator(), allocator->Adapter()), - non_aliasing_refs_(std::less<uint16_t>(), allocator->Adapter()), - null_checked_(std::less<uint16_t>(), allocator->Adapter()) { - std::fill_n(unresolved_sfield_version_, kFieldTypeCount, 0u); - std::fill_n(unresolved_ifield_version_, kFieldTypeCount, 0u); - } - - uint16_t GetFieldId(const DexFile* dex_file, uint16_t field_idx); - void AdvanceGlobalMemory(); - uint16_t GetMemoryVersion(uint16_t base, uint16_t field, uint16_t type); - uint16_t AdvanceMemoryVersion(uint16_t base, uint16_t field, uint16_t type); + uint16_t GetFieldId(const MirFieldInfo& field_info); uint16_t MarkNonAliasingNonNull(MIR* mir); - void MakeArgsAliasing(MIR* mir); + bool IsNonAliasing(uint16_t reg); + bool IsNonAliasingIField(uint16_t reg, uint16_t field_id, uint16_t type); + bool IsNonAliasingArray(uint16_t reg, uint16_t type); void HandleNullCheck(MIR* mir, uint16_t reg); void HandleRangeCheck(MIR* mir, uint16_t array, uint16_t index); void HandlePutObject(MIR* mir); + void HandleEscapingRef(uint16_t base); + uint16_t HandleAGet(MIR* mir, uint16_t opcode); + void HandleAPut(MIR* mir, uint16_t opcode); + uint16_t HandleIGet(MIR* mir, uint16_t opcode); + void HandleIPut(MIR* mir, uint16_t opcode); + uint16_t HandleSGet(MIR* mir, uint16_t opcode); + void HandleSPut(MIR* mir, uint16_t opcode); CompilationUnit* const cu_; - std::unique_ptr<ScopedArenaAllocator> allocator_; + + // We have 32-bit last_value_ so that we can detect when we run out of value names, see Good(). + // We usually don't check Good() until the end of LVN unless we're about to modify code. + uint32_t last_value_; + SregValueMap sreg_value_map_; SregValueMap sreg_wide_value_map_; ValueMap value_map_; - uint16_t next_memory_version_; + + // Data for dealing with memory clobbering and store/load aliasing. uint16_t global_memory_version_; uint16_t unresolved_sfield_version_[kFieldTypeCount]; uint16_t unresolved_ifield_version_[kFieldTypeCount]; - MemoryVersionMap memory_version_map_; + uint16_t aliasing_array_version_[kFieldTypeCount]; + AliasingIFieldVersionMap aliasing_ifield_version_map_; + NonAliasingArrayVersionMap non_aliasing_array_version_map_; FieldIndexMap field_index_map_; // Value names of references to objects that cannot be reached through a different value name. ValueNameSet non_aliasing_refs_; + // Instance fields still holding non-aliased values after the base has escaped. + NonAliasingFieldSet non_aliasing_ifields_; + // Previously non-aliasing array refs that escaped but can still be used for non-aliasing AGET. + EscapedArraySet escaped_array_refs_; + + // Range check and null check elimination. + RangeCheckSet range_checked_; ValueNameSet null_checked_; DISALLOW_COPY_AND_ASSIGN(LocalValueNumbering); diff --git a/compiler/dex/local_value_numbering_test.cc b/compiler/dex/local_value_numbering_test.cc index e56e0160ca..efc4fc8a34 100644 --- a/compiler/dex/local_value_numbering_test.cc +++ b/compiler/dex/local_value_numbering_test.cc @@ -40,7 +40,7 @@ class LocalValueNumberingTest : public testing::Test { struct MIRDef { static constexpr size_t kMaxSsaDefs = 2; - static constexpr size_t kMaxSsaUses = 3; + static constexpr size_t kMaxSsaUses = 4; Instruction::Code opcode; int64_t value; @@ -55,6 +55,8 @@ class LocalValueNumberingTest : public testing::Test { { opcode, value, 0u, 0, { }, 1, { reg } } #define DEF_CONST_WIDE(opcode, reg, value) \ { opcode, value, 0u, 0, { }, 2, { reg, reg + 1 } } +#define DEF_CONST_STRING(opcode, reg, index) \ + { opcode, index, 0u, 0, { }, 1, { reg } } #define DEF_IGET(opcode, reg, obj, field_info) \ { opcode, 0u, field_info, 1, { obj }, 1, { reg } } #define DEF_IGET_WIDE(opcode, reg, obj, field_info) \ @@ -71,6 +73,14 @@ class LocalValueNumberingTest : public testing::Test { { opcode, 0u, field_info, 1, { reg }, 0, { } } #define DEF_SPUT_WIDE(opcode, reg, field_info) \ { opcode, 0u, field_info, 2, { reg, reg + 1 }, 0, { } } +#define DEF_AGET(opcode, reg, obj, idx) \ + { opcode, 0u, 0u, 2, { obj, idx }, 1, { reg } } +#define DEF_AGET_WIDE(opcode, reg, obj, idx) \ + { opcode, 0u, 0u, 2, { obj, idx }, 2, { reg, reg + 1 } } +#define DEF_APUT(opcode, reg, obj, idx) \ + { opcode, 0u, 0u, 3, { reg, obj, idx }, 0, { } } +#define DEF_APUT_WIDE(opcode, reg, obj, idx) \ + { opcode, 0u, 0u, 4, { reg, reg + 1, obj, idx }, 0, { } } #define DEF_INVOKE1(opcode, reg) \ { opcode, 0u, 0u, 1, { reg }, 0, { } } #define DEF_UNIQUE_REF(opcode, reg) \ @@ -163,6 +173,7 @@ class LocalValueNumberingTest : public testing::Test { for (size_t i = 0; i != mir_count_; ++i) { value_names_[i] = lvn_->GetValueNumber(&mirs_[i]); } + EXPECT_TRUE(lvn_->Good()); } LocalValueNumberingTest() @@ -170,8 +181,11 @@ class LocalValueNumberingTest : public testing::Test { cu_(&pool_), mir_count_(0u), mirs_(nullptr), - lvn_(LocalValueNumbering::Create(&cu_)) { + allocator_(), + lvn_() { cu_.mir_graph.reset(new MIRGraph(&cu_, &cu_.arena)); + allocator_.reset(ScopedArenaAllocator::Create(&cu_.arena_stack)); + lvn_.reset(new (allocator_.get()) LocalValueNumbering(&cu_, allocator_.get())); } ArenaPool pool_; @@ -180,12 +194,13 @@ class LocalValueNumberingTest : public testing::Test { MIR* mirs_; std::vector<SSARepresentation> ssa_reps_; std::vector<uint16_t> value_names_; + std::unique_ptr<ScopedArenaAllocator> allocator_; std::unique_ptr<LocalValueNumbering> lvn_; }; -TEST_F(LocalValueNumberingTest, TestIGetIGetInvokeIGet) { +TEST_F(LocalValueNumberingTest, IGetIGetInvokeIGet) { static const IFieldDef ifields[] = { - { 1u, 1u, 1u, false } + { 1u, 1u, 1u, false }, }; static const MIRDef mirs[] = { DEF_IGET(Instruction::IGET, 0u, 10u, 0u), @@ -206,15 +221,15 @@ TEST_F(LocalValueNumberingTest, TestIGetIGetInvokeIGet) { EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK); } -TEST_F(LocalValueNumberingTest, TestIGetIPutIGetIGetIGet) { +TEST_F(LocalValueNumberingTest, IGetIPutIGetIGetIGet) { static const IFieldDef ifields[] = { { 1u, 1u, 1u, false }, { 2u, 1u, 2u, false }, }; static const MIRDef mirs[] = { - DEF_IGET(Instruction::IGET, 0u, 10u, 0u), - DEF_IPUT(Instruction::IPUT, 1u, 11u, 0u), // May alias. - DEF_IGET(Instruction::IGET, 2u, 10u, 0u), + DEF_IGET(Instruction::IGET_OBJECT, 0u, 10u, 0u), + DEF_IPUT(Instruction::IPUT_OBJECT, 1u, 11u, 0u), // May alias. + DEF_IGET(Instruction::IGET_OBJECT, 2u, 10u, 0u), DEF_IGET(Instruction::IGET, 3u, 0u, 1u), DEF_IGET(Instruction::IGET, 4u, 2u, 1u), }; @@ -232,7 +247,7 @@ TEST_F(LocalValueNumberingTest, TestIGetIPutIGetIGetIGet) { EXPECT_EQ(mirs_[4].optimization_flags, 0u); } -TEST_F(LocalValueNumberingTest, TestUniquePreserve1) { +TEST_F(LocalValueNumberingTest, UniquePreserve1) { static const IFieldDef ifields[] = { { 1u, 1u, 1u, false }, }; @@ -253,7 +268,7 @@ TEST_F(LocalValueNumberingTest, TestUniquePreserve1) { EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK); } -TEST_F(LocalValueNumberingTest, TestUniquePreserve2) { +TEST_F(LocalValueNumberingTest, UniquePreserve2) { static const IFieldDef ifields[] = { { 1u, 1u, 1u, false }, }; @@ -274,7 +289,7 @@ TEST_F(LocalValueNumberingTest, TestUniquePreserve2) { EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK); } -TEST_F(LocalValueNumberingTest, TestUniquePreserveAndEscape) { +TEST_F(LocalValueNumberingTest, UniquePreserveAndEscape) { static const IFieldDef ifields[] = { { 1u, 1u, 1u, false }, }; @@ -298,7 +313,7 @@ TEST_F(LocalValueNumberingTest, TestUniquePreserveAndEscape) { EXPECT_EQ(mirs_[5].optimization_flags, MIR_IGNORE_NULL_CHECK); } -TEST_F(LocalValueNumberingTest, TestVolatile) { +TEST_F(LocalValueNumberingTest, Volatile) { static const IFieldDef ifields[] = { { 1u, 1u, 1u, false }, { 2u, 1u, 2u, true }, @@ -322,4 +337,264 @@ TEST_F(LocalValueNumberingTest, TestVolatile) { EXPECT_EQ(mirs_[3].optimization_flags, 0u); } +TEST_F(LocalValueNumberingTest, UnresolvedIField) { + static const IFieldDef ifields[] = { + { 1u, 1u, 1u, false }, // Resolved field #1. + { 2u, 1u, 2u, false }, // Resolved field #2. + { 3u, 0u, 0u, false }, // Unresolved field. + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u), + DEF_IGET(Instruction::IGET, 1u, 20u, 0u), // Resolved field #1, unique object. + DEF_IGET(Instruction::IGET, 2u, 21u, 0u), // Resolved field #1. + DEF_IGET_WIDE(Instruction::IGET_WIDE, 3u, 21u, 1u), // Resolved field #2. + DEF_IGET(Instruction::IGET, 4u, 22u, 2u), // IGET doesn't clobber anything. + DEF_IGET(Instruction::IGET, 5u, 20u, 0u), // Resolved field #1, unique object. + DEF_IGET(Instruction::IGET, 6u, 21u, 0u), // Resolved field #1. + DEF_IGET_WIDE(Instruction::IGET_WIDE, 7u, 21u, 1u), // Resolved field #2. + DEF_IPUT(Instruction::IPUT, 8u, 22u, 2u), // IPUT clobbers field #1 (#2 if wide). + DEF_IGET(Instruction::IGET, 9u, 20u, 0u), // Resolved field #1, unique object. + DEF_IGET(Instruction::IGET, 10u, 21u, 0u), // Resolved field #1, new value name. + DEF_IGET_WIDE(Instruction::IGET_WIDE, 11u, 21u, 1u), // Resolved field #2. + }; + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 12u); + EXPECT_EQ(value_names_[1], value_names_[5]); + EXPECT_EQ(value_names_[2], value_names_[6]); + EXPECT_EQ(value_names_[3], value_names_[7]); + EXPECT_EQ(value_names_[1], value_names_[9]); + EXPECT_NE(value_names_[2], value_names_[10]); // This aliased with unresolved IPUT. + EXPECT_EQ(value_names_[3], value_names_[11]); + EXPECT_EQ(mirs_[0].optimization_flags, 0u); + EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(mirs_[2].optimization_flags, 0u); + EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(mirs_[4].optimization_flags, 0u); + for (size_t i = 5u; i != mir_count_; ++i) { + EXPECT_EQ(mirs_[i].optimization_flags, MIR_IGNORE_NULL_CHECK); + } +} + +TEST_F(LocalValueNumberingTest, UnresolvedSField) { + static const SFieldDef sfields[] = { + { 1u, 1u, 1u, false }, // Resolved field #1. + { 2u, 1u, 2u, false }, // Resolved field #2. + { 3u, 0u, 0u, false }, // Unresolved field. + }; + static const MIRDef mirs[] = { + DEF_SGET(Instruction::SGET, 0u, 0u), // Resolved field #1. + DEF_SGET_WIDE(Instruction::SGET_WIDE, 1u, 1u), // Resolved field #2. + DEF_SGET(Instruction::SGET, 2u, 2u), // SGET doesn't clobber anything. + DEF_SGET(Instruction::SGET, 3u, 0u), // Resolved field #1. + DEF_SGET_WIDE(Instruction::SGET_WIDE, 4u, 1u), // Resolved field #2. + DEF_SPUT(Instruction::SPUT, 5u, 2u), // SPUT clobbers field #1 (#2 is wide). + DEF_SGET(Instruction::SGET, 6u, 0u), // Resolved field #1. + DEF_SGET_WIDE(Instruction::SGET_WIDE, 7u, 1u), // Resolved field #2. + }; + + PrepareSFields(sfields); + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 8u); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_EQ(value_names_[1], value_names_[4]); + EXPECT_NE(value_names_[0], value_names_[6]); // This aliased with unresolved IPUT. + EXPECT_EQ(value_names_[1], value_names_[7]); + for (size_t i = 0u; i != mir_count_; ++i) { + EXPECT_EQ(mirs_[i].optimization_flags, 0u) << i; + } +} + +TEST_F(LocalValueNumberingTest, ConstString) { + static const MIRDef mirs[] = { + DEF_CONST_STRING(Instruction::CONST_STRING, 0u, 0u), + DEF_CONST_STRING(Instruction::CONST_STRING, 1u, 0u), + DEF_CONST_STRING(Instruction::CONST_STRING, 2u, 2u), + DEF_CONST_STRING(Instruction::CONST_STRING, 3u, 0u), + DEF_INVOKE1(Instruction::INVOKE_DIRECT, 2u), + DEF_CONST_STRING(Instruction::CONST_STRING, 4u, 2u), + }; + + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 6u); + EXPECT_EQ(value_names_[1], value_names_[0]); + EXPECT_NE(value_names_[2], value_names_[0]); + EXPECT_EQ(value_names_[3], value_names_[0]); + EXPECT_EQ(value_names_[5], value_names_[2]); +} + +TEST_F(LocalValueNumberingTest, SameValueInDifferentMemoryLocations) { + static const IFieldDef ifields[] = { + { 1u, 1u, 1u, false }, + { 2u, 1u, 2u, false }, + }; + static const SFieldDef sfields[] = { + { 3u, 1u, 3u, false }, + }; + static const MIRDef mirs[] = { + DEF_IGET(Instruction::IGET, 0u, 10u, 0u), + DEF_IPUT(Instruction::IPUT, 0u, 10u, 1u), + DEF_SPUT(Instruction::SPUT, 0u, 0u), + DEF_APUT(Instruction::APUT, 0u, 11u, 12u), + DEF_IGET(Instruction::IGET, 1u, 10u, 0u), + DEF_IGET(Instruction::IGET, 2u, 10u, 1u), + DEF_AGET(Instruction::AGET, 3u, 11u, 12u), + DEF_SGET(Instruction::SGET, 4u, 0u), + }; + + PrepareIFields(ifields); + PrepareSFields(sfields); + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 8u); + EXPECT_EQ(value_names_[4], value_names_[0]); + EXPECT_EQ(value_names_[5], value_names_[0]); + EXPECT_EQ(value_names_[6], value_names_[0]); + EXPECT_EQ(value_names_[7], value_names_[0]); + EXPECT_EQ(mirs_[0].optimization_flags, 0u); + EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(mirs_[2].optimization_flags, 0u); + EXPECT_EQ(mirs_[3].optimization_flags, 0u); + EXPECT_EQ(mirs_[4].optimization_flags, MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(mirs_[5].optimization_flags, MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(mirs_[6].optimization_flags, MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK); + EXPECT_EQ(mirs_[7].optimization_flags, 0u); +} + +TEST_F(LocalValueNumberingTest, UniqueArrayAliasing) { + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 20u), + DEF_AGET(Instruction::AGET, 1u, 20u, 40u), + DEF_APUT(Instruction::APUT, 2u, 20u, 41u), // May alias with index for sreg 40u. + DEF_AGET(Instruction::AGET, 3u, 20u, 40u), + }; + + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 4u); + EXPECT_NE(value_names_[1], value_names_[3]); + EXPECT_EQ(mirs_[0].optimization_flags, 0u); + EXPECT_EQ(mirs_[1].optimization_flags, MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(mirs_[2].optimization_flags, MIR_IGNORE_NULL_CHECK); + EXPECT_EQ(mirs_[3].optimization_flags, MIR_IGNORE_NULL_CHECK | MIR_IGNORE_RANGE_CHECK); +} + +TEST_F(LocalValueNumberingTest, EscapingRefs) { + static const IFieldDef ifields[] = { + { 1u, 1u, 1u, false }, // Field #1. + { 2u, 1u, 2u, false }, // Field #2. + { 3u, 1u, 3u, false }, // Reference field for storing escaping refs. + { 4u, 1u, 4u, false }, // Wide. + { 5u, 0u, 0u, false }, // Unresolved field, int. + { 6u, 0u, 0u, false }, // Unresolved field, wide. + }; + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(Instruction::NEW_INSTANCE, 20u), + DEF_IGET(Instruction::IGET, 1u, 20u, 0u), + DEF_IGET(Instruction::IGET, 2u, 20u, 1u), + DEF_IPUT(Instruction::IPUT_OBJECT, 20u, 30u, 2u), // Ref escapes. + DEF_IGET(Instruction::IGET, 4u, 20u, 0u), + DEF_IGET(Instruction::IGET, 5u, 20u, 1u), + DEF_IPUT(Instruction::IPUT, 6u, 31u, 0u), // May alias with field #1. + DEF_IGET(Instruction::IGET, 7u, 20u, 0u), // New value. + DEF_IGET(Instruction::IGET, 8u, 20u, 1u), // Still the same. + DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 9u, 31u, 3u), // No aliasing, different type. + DEF_IGET(Instruction::IGET, 10u, 20u, 0u), + DEF_IGET(Instruction::IGET, 11u, 20u, 1u), + DEF_IPUT_WIDE(Instruction::IPUT_WIDE, 12u, 31u, 5u), // No aliasing, different type. + DEF_IGET(Instruction::IGET, 13u, 20u, 0u), + DEF_IGET(Instruction::IGET, 14u, 20u, 1u), + DEF_IPUT(Instruction::IPUT, 15u, 31u, 4u), // Aliasing, same type. + DEF_IGET(Instruction::IGET, 16u, 20u, 0u), + DEF_IGET(Instruction::IGET, 17u, 20u, 1u), + }; + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 18u); + EXPECT_EQ(value_names_[1], value_names_[4]); + EXPECT_EQ(value_names_[2], value_names_[5]); + EXPECT_NE(value_names_[4], value_names_[7]); // New value. + EXPECT_EQ(value_names_[5], value_names_[8]); + EXPECT_EQ(value_names_[7], value_names_[10]); + EXPECT_EQ(value_names_[8], value_names_[11]); + EXPECT_EQ(value_names_[10], value_names_[13]); + EXPECT_EQ(value_names_[11], value_names_[14]); + EXPECT_NE(value_names_[13], value_names_[16]); // New value. + EXPECT_NE(value_names_[14], value_names_[17]); // New value. + for (size_t i = 0u; i != mir_count_; ++i) { + int expected = (i != 0u && i != 3u && i != 6u) ? MIR_IGNORE_NULL_CHECK : 0u; + EXPECT_EQ(expected, mirs_[i].optimization_flags) << i; + } +} + +TEST_F(LocalValueNumberingTest, EscapingArrayRefs) { + static const MIRDef mirs[] = { + DEF_UNIQUE_REF(Instruction::NEW_ARRAY, 20u), + DEF_AGET(Instruction::AGET, 1u, 20u, 40u), + DEF_AGET(Instruction::AGET, 2u, 20u, 41u), + DEF_APUT(Instruction::APUT_OBJECT, 20u, 30u, 42u), // Array ref escapes. + DEF_AGET(Instruction::AGET, 4u, 20u, 40u), + DEF_AGET(Instruction::AGET, 5u, 20u, 41u), + DEF_APUT_WIDE(Instruction::APUT_WIDE, 6u, 31u, 43u), // No aliasing, different type. + DEF_AGET(Instruction::AGET, 7u, 20u, 40u), + DEF_AGET(Instruction::AGET, 8u, 20u, 41u), + DEF_APUT(Instruction::APUT, 9u, 32u, 40u), // May alias with all elements. + DEF_AGET(Instruction::AGET, 10u, 20u, 40u), // New value (same index name). + DEF_AGET(Instruction::AGET, 11u, 20u, 41u), // New value (different index name). + }; + + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 12u); + EXPECT_EQ(value_names_[1], value_names_[4]); + EXPECT_EQ(value_names_[2], value_names_[5]); + EXPECT_EQ(value_names_[4], value_names_[7]); + EXPECT_EQ(value_names_[5], value_names_[8]); + EXPECT_NE(value_names_[7], value_names_[10]); // New value. + EXPECT_NE(value_names_[8], value_names_[11]); // New value. + for (size_t i = 0u; i != mir_count_; ++i) { + int expected = + ((i != 0u && i != 3u && i != 6u && i != 9u) ? MIR_IGNORE_NULL_CHECK : 0u) | + ((i >= 4 && i != 6u && i != 9u) ? MIR_IGNORE_RANGE_CHECK : 0u); + EXPECT_EQ(expected, mirs_[i].optimization_flags) << i; + } +} + +TEST_F(LocalValueNumberingTest, StoringSameValueKeepsMemoryVersion) { + static const IFieldDef ifields[] = { + { 1u, 1u, 1u, false }, + }; + static const MIRDef mirs[] = { + DEF_IGET(Instruction::IGET, 0u, 10u, 0u), + DEF_IGET(Instruction::IGET, 1u, 11u, 0u), + DEF_IPUT(Instruction::IPUT, 1u, 11u, 0u), // Store the same value. + DEF_IGET(Instruction::IGET, 3u, 10u, 0u), + DEF_AGET(Instruction::AGET, 4u, 12u, 40u), + DEF_AGET(Instruction::AGET, 5u, 13u, 40u), + DEF_APUT(Instruction::APUT, 5u, 13u, 40u), // Store the same value. + DEF_AGET(Instruction::AGET, 7u, 12u, 40u), + }; + + PrepareIFields(ifields); + PrepareMIRs(mirs); + PerformLVN(); + ASSERT_EQ(value_names_.size(), 8u); + EXPECT_NE(value_names_[0], value_names_[1]); + EXPECT_EQ(value_names_[0], value_names_[3]); + EXPECT_NE(value_names_[4], value_names_[5]); + EXPECT_EQ(value_names_[4], value_names_[7]); + for (size_t i = 0u; i != mir_count_; ++i) { + int expected = + ((i == 2u || i == 3u || i == 6u || i == 7u) ? MIR_IGNORE_NULL_CHECK : 0u) | + ((i == 6u || i == 7u) ? MIR_IGNORE_RANGE_CHECK : 0u); + EXPECT_EQ(expected, mirs_[i].optimization_flags) << i; + } +} + } // namespace art diff --git a/compiler/dex/mir_analysis.cc b/compiler/dex/mir_analysis.cc index 508f1c70bd..7129f8a501 100644 --- a/compiler/dex/mir_analysis.cc +++ b/compiler/dex/mir_analysis.cc @@ -902,7 +902,7 @@ void MIRGraph::AnalyzeBlock(BasicBlock* bb, MethodStats* stats) { while (!done) { tbb->visited = true; for (MIR* mir = tbb->first_mir_insn; mir != NULL; mir = mir->next) { - if (static_cast<uint32_t>(mir->dalvikInsn.opcode) >= kMirOpFirst) { + if (IsPseudoMirOp(mir->dalvikInsn.opcode)) { // Skip any MIR pseudo-op. continue; } diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index 47b233b463..9fea709568 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -1015,8 +1015,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { uint64_t df_attributes = GetDataFlowAttributes(mir); // If not a pseudo-op, note non-leaf or can throw - if (static_cast<int>(mir->dalvikInsn.opcode) < - static_cast<int>(kNumPackedOpcodes)) { + if (!IsPseudoMirOp(mir->dalvikInsn.opcode)) { int flags = Instruction::FlagsOf(mir->dalvikInsn.opcode); if ((flags & Instruction::kInvoke) != 0 && (mir->optimization_flags & MIR_INLINED) == 0) { @@ -1282,7 +1281,7 @@ bool MIRGraph::VerifyPredInfo(BasicBlock* bb) { GrowableArray<BasicBlockId>::Iterator iter(bb->predecessors); while (true) { - BasicBlock *pred_bb = GetBasicBlock(iter.Next()); + BasicBlock* pred_bb = GetBasicBlock(iter.Next()); if (!pred_bb) break; bool found = false; if (pred_bb->taken == bb->id) { diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index 0fffa01350..3ef1dbfac3 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -26,6 +26,7 @@ #include "dex/quick/dex_file_to_method_inliner_map.h" #include "dex/quick/dex_file_method_inliner.h" #include "leb128.h" +#include "pass_driver_me_post_opt.h" namespace art { @@ -353,7 +354,7 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs /* Always terminate the current block for conditional branches */ if (flags & Instruction::kContinue) { - BasicBlock *fallthrough_block = FindBlock(cur_offset + width, + BasicBlock* fallthrough_block = FindBlock(cur_offset + width, /* * If the method is processed * in sequential order from the @@ -541,15 +542,14 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse * Note also that the dex_pc_to_block_map_ entry for the potentially * throwing instruction will refer to the original basic block. */ - BasicBlock *new_block = NewMemBB(kDalvikByteCode, num_blocks_++); + BasicBlock* new_block = NewMemBB(kDalvikByteCode, num_blocks_++); block_list_.Insert(new_block); new_block->start_offset = insn->offset; cur_block->fall_through = new_block->id; new_block->predecessors->Insert(cur_block->id); MIR* new_insn = NewMIR(); *new_insn = *insn; - insn->dalvikInsn.opcode = - static_cast<Instruction::Code>(kMirOpCheck); + insn->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpCheck); // Associate the two halves. insn->meta.throw_insn = new_insn; new_block->AppendMIR(new_insn); @@ -724,7 +724,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ } } current_offset_ += width; - BasicBlock *next_block = FindBlock(current_offset_, /* split */ false, /* create */ + BasicBlock* next_block = FindBlock(current_offset_, /* split */ false, /* create */ false, /* immed_pred_block_p */ NULL); if (next_block) { /* @@ -836,8 +836,7 @@ void MIRGraph::DumpCFG(const char* dir_prefix, bool all_blocks, const char *suff } else { fprintf(file, " {%04x %s %s %s\\l}%s\\\n", mir->offset, mir->ssa_rep ? GetDalvikDisassembly(mir) : - (opcode < kMirOpFirst) ? - Instruction::Name(mir->dalvikInsn.opcode) : + !IsPseudoMirOp(opcode) ? Instruction::Name(mir->dalvikInsn.opcode) : extended_mir_op_names_[opcode - kMirOpFirst], (mir->optimization_flags & MIR_IGNORE_RANGE_CHECK) != 0 ? " no_rangecheck" : " ", (mir->optimization_flags & MIR_IGNORE_NULL_CHECK) != 0 ? " no_nullcheck" : " ", @@ -1073,19 +1072,21 @@ bool BasicBlock::RemoveMIRList(MIR* first_list_mir, MIR* last_list_mir) { } } - // Remove the BB information and also find the after_list + // Remove the BB information and also find the after_list. for (MIR* mir = first_list_mir; mir != last_list_mir; mir = mir->next) { mir->bb = NullBasicBlockId; } after_list = last_list_mir->next; - // If there is nothing before the list, after_list is the first_mir + // If there is nothing before the list, after_list is the first_mir. if (before_list == nullptr) { first_mir_insn = after_list; + } else { + before_list->next = after_list; } - // If there is nothing after the list, before_list is last_mir + // If there is nothing after the list, before_list is last_mir. if (after_list == nullptr) { last_mir_insn = before_list; } @@ -1140,7 +1141,7 @@ char* MIRGraph::GetDalvikDisassembly(const MIR* mir) { nop = true; } - if (opcode >= kMirOpFirst) { + if (IsPseudoMirOp(opcode)) { str.append(extended_mir_op_names_[opcode - kMirOpFirst]); } else { dalvik_format = Instruction::FormatOf(insn.opcode); @@ -1418,25 +1419,6 @@ void MIRGraph::SSATransformationStart() { temp_bit_vector_ = new (temp_scoped_alloc_.get()) ArenaBitVector( temp_scoped_alloc_.get(), temp_bit_vector_size_, false, kBitMapRegisterV); - /* Compute the DFS order */ - ComputeDFSOrders(); - - /* Compute the dominator info */ - ComputeDominators(); - - /* Allocate data structures in preparation for SSA conversion */ - CompilerInitializeSSAConversion(); - - /* Find out the "Dalvik reg def x block" relation */ - ComputeDefBlockMatrix(); - - /* Insert phi nodes to dominance frontiers for all variables */ - InsertPhiNodes(); - - /* Rename register names by local defs and phi nodes */ - ClearAllVisitedFlags(); - DoDFSPreOrderSSARename(GetEntryBlock()); - // Update the maximum number of reachable blocks. max_num_reachable_blocks_ = num_reachable_blocks_; } @@ -1454,7 +1436,7 @@ void MIRGraph::SSATransformationEnd() { } void MIRGraph::ComputeTopologicalSortOrder() { - std::queue<BasicBlock *> q; + std::queue<BasicBlock*> q; std::map<int, int> visited_cnt_values; // Clear the nodes. @@ -1510,7 +1492,7 @@ void MIRGraph::ComputeTopologicalSortOrder() { while (q.size() > 0) { // Get top. - BasicBlock *bb = q.front(); + BasicBlock* bb = q.front(); q.pop(); DCHECK_EQ(bb->hidden, false); @@ -1528,7 +1510,7 @@ void MIRGraph::ComputeTopologicalSortOrder() { // Reduce visitedCnt for all the successors and add into the queue ones with visitedCnt equals to zero. ChildBlockIterator succIter(bb, this); - BasicBlock *successor = succIter.Next(); + BasicBlock* successor = succIter.Next(); while (successor != nullptr) { // one more predecessor was visited. visited_cnt_values[successor->id]--; @@ -1914,4 +1896,13 @@ BasicBlock* MIRGraph::CreateNewBB(BBType block_type) { return res; } +void MIRGraph::CalculateBasicBlockInformation() { + PassDriverMEPostOpt driver(cu_); + driver.Launch(); +} + +void MIRGraph::InitializeBasicBlockData() { + num_blocks_ = block_list_.Size(); +} + } // namespace art diff --git a/compiler/dex/mir_graph.h b/compiler/dex/mir_graph.h index 3655125182..38cd5ee449 100644 --- a/compiler/dex/mir_graph.h +++ b/compiler/dex/mir_graph.h @@ -905,11 +905,11 @@ class MIRGraph { return backward_branches_ + forward_branches_; } - bool IsPseudoMirOp(Instruction::Code opcode) { + static bool IsPseudoMirOp(Instruction::Code opcode) { return static_cast<int>(opcode) >= static_cast<int>(kMirOpFirst); } - bool IsPseudoMirOp(int opcode) { + static bool IsPseudoMirOp(int opcode) { return opcode >= static_cast<int>(kMirOpFirst); } @@ -924,7 +924,7 @@ class MIRGraph { void VerifyDataflow(); void CheckForDominanceFrontier(BasicBlock* dom_bb, const BasicBlock* succ_bb); void EliminateNullChecksAndInferTypesStart(); - bool EliminateNullChecksAndInferTypes(BasicBlock *bb); + bool EliminateNullChecksAndInferTypes(BasicBlock* bb); void EliminateNullChecksAndInferTypesEnd(); bool EliminateClassInitChecksGate(); bool EliminateClassInitChecks(BasicBlock* bb); @@ -1030,6 +1030,14 @@ class MIRGraph { void AllocateSSAUseData(MIR *mir, int num_uses); void AllocateSSADefData(MIR *mir, int num_defs); + void CalculateBasicBlockInformation(); + void InitializeBasicBlockData(); + void ComputeDFSOrders(); + void ComputeDefBlockMatrix(); + void ComputeDominators(); + void CompilerInitializeSSAConversion(); + void InsertPhiNodes(); + void DoDFSPreOrderSSARename(BasicBlock* block); /* * IsDebugBuild sanity check: keep track of the Dex PCs for catch entries so that later on @@ -1046,7 +1054,6 @@ class MIRGraph { void HandleSSADef(int* defs, int dalvik_reg, int reg_index); bool InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed); - void ComputeDFSOrders(); protected: int FindCommonParent(int block1, int block2); @@ -1055,7 +1062,6 @@ class MIRGraph { void HandleLiveInUse(ArenaBitVector* use_v, ArenaBitVector* def_v, ArenaBitVector* live_in_v, int dalvik_reg_id); void HandleDef(ArenaBitVector* def_v, int dalvik_reg_id); - void CompilerInitializeSSAConversion(); bool DoSSAConversion(BasicBlock* bb); bool InvokeUsesMethodStar(MIR* mir); int ParseInsn(const uint16_t* code_ptr, MIR::DecodedInstruction* decoded_instruction); @@ -1082,11 +1088,7 @@ class MIRGraph { BasicBlock* NextUnvisitedSuccessor(BasicBlock* bb); void MarkPreOrder(BasicBlock* bb); void RecordDFSOrders(BasicBlock* bb); - void ComputeDefBlockMatrix(); void ComputeDomPostOrderTraversal(BasicBlock* bb); - void ComputeDominators(); - void InsertPhiNodes(); - void DoDFSPreOrderSSARename(BasicBlock* block); void SetConstant(int32_t ssa_reg, int value); void SetConstantWide(int ssa_reg, int64_t value); int GetSSAUseCount(int s_reg); diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc index 1d4aef2183..1460ce631b 100644 --- a/compiler/dex/mir_optimization.cc +++ b/compiler/dex/mir_optimization.cc @@ -129,17 +129,16 @@ MIR* MIRGraph::FindMoveResult(BasicBlock* bb, MIR* mir) { BasicBlock* tbb = bb; mir = AdvanceMIR(&tbb, mir); while (mir != NULL) { - int opcode = mir->dalvikInsn.opcode; if ((mir->dalvikInsn.opcode == Instruction::MOVE_RESULT) || (mir->dalvikInsn.opcode == Instruction::MOVE_RESULT_OBJECT) || (mir->dalvikInsn.opcode == Instruction::MOVE_RESULT_WIDE)) { break; } // Keep going if pseudo op, otherwise terminate - if (opcode < kNumPackedOpcodes) { - mir = NULL; - } else { + if (IsPseudoMirOp(mir->dalvikInsn.opcode)) { mir = AdvanceMIR(&tbb, mir); + } else { + mir = NULL; } } return mir; @@ -320,9 +319,11 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { return true; } bool use_lvn = bb->use_lvn; + std::unique_ptr<ScopedArenaAllocator> allocator; std::unique_ptr<LocalValueNumbering> local_valnum; if (use_lvn) { - local_valnum.reset(LocalValueNumbering::Create(cu_)); + allocator.reset(ScopedArenaAllocator::Create(&cu_->arena_stack)); + local_valnum.reset(new (allocator.get()) LocalValueNumbering(cu_, allocator.get())); } while (bb != NULL) { for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { @@ -415,7 +416,8 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { // TODO: flesh out support for Mips. NOTE: llvm's select op doesn't quite work here. // TUNING: expand to support IF_xx compare & branches if (!cu_->compiler->IsPortable() && - (cu_->instruction_set == kThumb2 || cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) && + (cu_->instruction_set == kArm64 || cu_->instruction_set == kThumb2 || + cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) && IsInstructionIfCcZ(mir->dalvikInsn.opcode)) { BasicBlock* ft = GetBasicBlock(bb->fall_through); DCHECK(ft != NULL); @@ -441,6 +443,8 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { if (SelectKind(tk->last_mir_insn) == kSelectGoto) { tk->last_mir_insn->optimization_flags |= (MIR_IGNORE_SUSPEND_CHECK); } + + // TODO: Add logic for LONG. // Are the block bodies something we can handle? if ((ft->first_mir_insn == ft->last_mir_insn) && (tk->first_mir_insn != tk->last_mir_insn) && @@ -550,6 +554,9 @@ bool MIRGraph::BasicBlockOpt(BasicBlock* bb) { } bb = ((cu_->disable_opt & (1 << kSuppressExceptionEdges)) != 0) ? NextDominatedBlock(bb) : NULL; } + if (use_lvn && UNLIKELY(!local_valnum->Good())) { + LOG(WARNING) << "LVN overflow in " << PrettyMethod(cu_->method_idx, *cu_->dex_file); + } return true; } @@ -861,7 +868,7 @@ bool MIRGraph::EliminateNullChecksAndInferTypes(BasicBlock* bb) { struct BasicBlock* next_bb = GetBasicBlock(bb->fall_through); for (MIR* tmir = next_bb->first_mir_insn; tmir != NULL; tmir =tmir->next) { - if (static_cast<int>(tmir->dalvikInsn.opcode) >= static_cast<int>(kMirOpFirst)) { + if (IsPseudoMirOp(tmir->dalvikInsn.opcode)) { continue; } // First non-pseudo should be MOVE_RESULT_OBJECT @@ -1178,6 +1185,9 @@ void MIRGraph::InlineCalls(BasicBlock* bb) { return; } for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { + if (IsPseudoMirOp(mir->dalvikInsn.opcode)) { + continue; + } if (!(Instruction::FlagsOf(mir->dalvikInsn.opcode) & Instruction::kInvoke)) { continue; } diff --git a/compiler/dex/mir_optimization_test.cc b/compiler/dex/mir_optimization_test.cc index 86092b6e3d..69c394f168 100644 --- a/compiler/dex/mir_optimization_test.cc +++ b/compiler/dex/mir_optimization_test.cc @@ -193,7 +193,7 @@ class ClassInitCheckEliminationTest : public testing::Test { ASSERT_TRUE(gate_result); RepeatingPreOrderDfsIterator iterator(cu_.mir_graph.get()); bool change = false; - for (BasicBlock *bb = iterator.Next(change); bb != 0; bb = iterator.Next(change)) { + for (BasicBlock* bb = iterator.Next(change); bb != nullptr; bb = iterator.Next(change)) { change = cu_.mir_graph->EliminateClassInitChecks(bb); } cu_.mir_graph->EliminateClassInitChecksEnd(); diff --git a/compiler/dex/pass.h b/compiler/dex/pass.h index 4ce040e9ab..b4906d67df 100644 --- a/compiler/dex/pass.h +++ b/compiler/dex/pass.h @@ -89,6 +89,21 @@ class Pass { return false; } + static void BasePrintMessage(CompilationUnit* c_unit, const char* pass_name, const char* message, ...) { + // Check if we want to log something or not. + if (c_unit->print_pass) { + // Stringify the message. + va_list args; + va_start(args, message); + std::string stringified_message; + StringAppendV(&stringified_message, message, args); + va_end(args); + + // Log the message and ensure to include pass name. + LOG(INFO) << pass_name << ": " << stringified_message; + } + } + protected: /** @brief The pass name: used for searching for a pass when running a particular pass or debugging. */ const char* const pass_name_; diff --git a/compiler/dex/pass_driver.h b/compiler/dex/pass_driver.h index aa0d1ae462..bd8f53cd5a 100644 --- a/compiler/dex/pass_driver.h +++ b/compiler/dex/pass_driver.h @@ -141,7 +141,6 @@ class PassDriver { } } - protected: /** * @brief Gets the list of passes currently schedule to execute. * @return pass_list_ @@ -150,14 +149,27 @@ class PassDriver { return pass_list_; } - virtual void InitializePasses() { - SetDefaultPasses(); + static void SetPrintAllPasses() { + default_print_passes_ = true; + } + + static void SetDumpPassList(const std::string& list) { + dump_pass_list_ = list; + } + + static void SetPrintPassList(const std::string& list) { + print_pass_list_ = list; } void SetDefaultPasses() { pass_list_ = PassDriver<PassDriverType>::g_default_pass_list; } + protected: + virtual void InitializePasses() { + SetDefaultPasses(); + } + /** * @brief Apply a patch: perform start/work/end functions. */ @@ -185,6 +197,15 @@ class PassDriver { /** @brief The default pass list is used to initialize pass_list_. */ static std::vector<const Pass*> g_default_pass_list; + + /** @brief Do we, by default, want to be printing the log messages? */ + static bool default_print_passes_; + + /** @brief What are the passes we want to be printing the log messages? */ + static std::string print_pass_list_; + + /** @brief What are the passes we want to be dumping the CFG? */ + static std::string dump_pass_list_; }; } // namespace art diff --git a/compiler/dex/pass_driver_me.cc b/compiler/dex/pass_driver_me.cc deleted file mode 100644 index d0545004f7..0000000000 --- a/compiler/dex/pass_driver_me.cc +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (C) 2014 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "base/macros.h" -#include "bb_optimizations.h" -#include "compiler_internals.h" -#include "dataflow_iterator.h" -#include "dataflow_iterator-inl.h" -#include "pass_driver_me.h" - -namespace art { - -namespace { // anonymous namespace - -void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, DataflowIterator* iterator) { - // Paranoid: Check the iterator before walking the BasicBlocks. - DCHECK(iterator != nullptr); - bool change = false; - for (BasicBlock *bb = iterator->Next(change); bb != 0; bb = iterator->Next(change)) { - data->bb = bb; - change = pass->Worker(data); - } -} - -template <typename Iterator> -inline void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) { - DCHECK(data != nullptr); - CompilationUnit* c_unit = data->c_unit; - DCHECK(c_unit != nullptr); - Iterator iterator(c_unit->mir_graph.get()); - DoWalkBasicBlocks(data, pass, &iterator); -} -} // anonymous namespace - -/* - * Create the pass list. These passes are immutable and are shared across the threads. - * - * Advantage is that there will be no race conditions here. - * Disadvantage is the passes can't change their internal states depending on CompilationUnit: - * - This is not yet an issue: no current pass would require it. - */ -// The initial list of passes to be used by the PassDriveME. -template<> -const Pass* const PassDriver<PassDriverME>::g_passes[] = { - GetPassInstance<CacheFieldLoweringInfo>(), - GetPassInstance<CacheMethodLoweringInfo>(), - GetPassInstance<CallInlining>(), - GetPassInstance<CodeLayout>(), - GetPassInstance<SSATransformation>(), - GetPassInstance<ConstantPropagation>(), - GetPassInstance<InitRegLocations>(), - GetPassInstance<MethodUseCount>(), - GetPassInstance<NullCheckEliminationAndTypeInference>(), - GetPassInstance<ClassInitCheckElimination>(), - GetPassInstance<BBCombine>(), - GetPassInstance<BBOptimizations>(), -}; - -// The number of the passes in the initial list of Passes (g_passes). -template<> -uint16_t const PassDriver<PassDriverME>::g_passes_size = arraysize(PassDriver<PassDriverME>::g_passes); - -// The default pass list is used by the PassDriverME instance of PassDriver to initialize pass_list_. -template<> -std::vector<const Pass*> PassDriver<PassDriverME>::g_default_pass_list(PassDriver<PassDriverME>::g_passes, PassDriver<PassDriverME>::g_passes + PassDriver<PassDriverME>::g_passes_size); - -PassDriverME::PassDriverME(CompilationUnit* cu) - : PassDriver(), pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") { - pass_me_data_holder_.bb = nullptr; - pass_me_data_holder_.c_unit = cu; -} - -PassDriverME::~PassDriverME() { -} - -void PassDriverME::DispatchPass(const Pass* pass) { - VLOG(compiler) << "Dispatching " << pass->GetName(); - const PassME* me_pass = down_cast<const PassME*>(pass); - - DataFlowAnalysisMode mode = me_pass->GetTraversal(); - - switch (mode) { - case kPreOrderDFSTraversal: - DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass); - break; - case kRepeatingPreOrderDFSTraversal: - DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass); - break; - case kRepeatingPostOrderDFSTraversal: - DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass); - break; - case kReversePostOrderDFSTraversal: - DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); - break; - case kRepeatingReversePostOrderDFSTraversal: - DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); - break; - case kPostOrderDOMTraversal: - DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass); - break; - case kAllNodes: - DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass); - break; - case kNoNodes: - break; - default: - LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode; - break; - } -} - -bool PassDriverME::RunPass(const Pass* pass, bool time_split) { - // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name - DCHECK(pass != nullptr); - DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0); - CompilationUnit* c_unit = pass_me_data_holder_.c_unit; - DCHECK(c_unit != nullptr); - - // Do we perform a time split - if (time_split) { - c_unit->NewTimingSplit(pass->GetName()); - } - - // Check the pass gate first. - bool should_apply_pass = pass->Gate(&pass_me_data_holder_); - if (should_apply_pass) { - // Applying the pass: first start, doWork, and end calls. - ApplyPass(&pass_me_data_holder_, pass); - - // Do we want to log it? - if ((c_unit->enable_debug& (1 << kDebugDumpCFG)) != 0) { - // Do we have a pass folder? - const PassME* me_pass = (down_cast<const PassME*>(pass)); - const char* passFolder = me_pass->GetDumpCFGFolder(); - DCHECK(passFolder != nullptr); - - if (passFolder[0] != 0) { - // Create directory prefix. - std::string prefix = GetDumpCFGFolder(); - prefix += passFolder; - prefix += "/"; - - c_unit->mir_graph->DumpCFG(prefix.c_str(), false); - } - } - } - - // If the pass gate passed, we can declare success. - return should_apply_pass; -} - -const char* PassDriverME::GetDumpCFGFolder() const { - return dump_cfg_folder_; -} - - -} // namespace art diff --git a/compiler/dex/pass_driver_me.h b/compiler/dex/pass_driver_me.h index 0142934be2..7d76fb83d4 100644 --- a/compiler/dex/pass_driver_me.h +++ b/compiler/dex/pass_driver_me.h @@ -18,28 +18,155 @@ #define ART_COMPILER_DEX_PASS_DRIVER_ME_H_ #include "bb_optimizations.h" +#include "dataflow_iterator.h" +#include "dataflow_iterator-inl.h" #include "pass_driver.h" #include "pass_me.h" namespace art { -class PassDriverME: public PassDriver<PassDriverME> { +template <typename PassDriverType> +class PassDriverME: public PassDriver<PassDriverType> { public: - explicit PassDriverME(CompilationUnit* cu); - ~PassDriverME(); - /** - * @brief Dispatch a patch: walk the BasicBlocks depending on the traversal mode - */ - void DispatchPass(const Pass* pass); - bool RunPass(const Pass* pass, bool time_split = false); - const char* GetDumpCFGFolder() const; + explicit PassDriverME(CompilationUnit* cu) + : pass_me_data_holder_(), dump_cfg_folder_("/sdcard/") { + pass_me_data_holder_.bb = nullptr; + pass_me_data_holder_.c_unit = cu; + } + + ~PassDriverME() { + } + + void DispatchPass(const Pass* pass) { + VLOG(compiler) << "Dispatching " << pass->GetName(); + const PassME* me_pass = down_cast<const PassME*>(pass); + + DataFlowAnalysisMode mode = me_pass->GetTraversal(); + + switch (mode) { + case kPreOrderDFSTraversal: + DoWalkBasicBlocks<PreOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingPreOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingPreOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingPostOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingPostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kReversePostOrderDFSTraversal: + DoWalkBasicBlocks<ReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kRepeatingReversePostOrderDFSTraversal: + DoWalkBasicBlocks<RepeatingReversePostOrderDfsIterator>(&pass_me_data_holder_, me_pass); + break; + case kPostOrderDOMTraversal: + DoWalkBasicBlocks<PostOrderDOMIterator>(&pass_me_data_holder_, me_pass); + break; + case kAllNodes: + DoWalkBasicBlocks<AllNodesIterator>(&pass_me_data_holder_, me_pass); + break; + case kNoNodes: + break; + default: + LOG(FATAL) << "Iterator mode not handled in dispatcher: " << mode; + break; + } + } + + bool RunPass(const Pass* pass, bool time_split) { + // Paranoid: c_unit and pass cannot be nullptr, and the pass should have a name + DCHECK(pass != nullptr); + DCHECK(pass->GetName() != nullptr && pass->GetName()[0] != 0); + CompilationUnit* c_unit = pass_me_data_holder_.c_unit; + DCHECK(c_unit != nullptr); + + // Do we perform a time split + if (time_split) { + c_unit->NewTimingSplit(pass->GetName()); + } + + // Check the pass gate first. + bool should_apply_pass = pass->Gate(&pass_me_data_holder_); + if (should_apply_pass) { + bool old_print_pass = c_unit->print_pass; + + c_unit->print_pass = PassDriver<PassDriverType>::default_print_passes_; + + const char* print_pass_list = PassDriver<PassDriverType>::print_pass_list_.c_str(); + + if (print_pass_list != nullptr && strstr(print_pass_list, pass->GetName()) != nullptr) { + c_unit->print_pass = true; + } + + // Applying the pass: first start, doWork, and end calls. + this->ApplyPass(&pass_me_data_holder_, pass); + + bool should_dump = ((c_unit->enable_debug & (1 << kDebugDumpCFG)) != 0); + + const char* dump_pass_list = PassDriver<PassDriverType>::dump_pass_list_.c_str(); + + if (dump_pass_list != nullptr) { + bool found = strstr(dump_pass_list, pass->GetName()); + should_dump = (should_dump || found); + } + + if (should_dump) { + // Do we want to log it? + if ((c_unit->enable_debug& (1 << kDebugDumpCFG)) != 0) { + // Do we have a pass folder? + const PassME* me_pass = (down_cast<const PassME*>(pass)); + const char* passFolder = me_pass->GetDumpCFGFolder(); + DCHECK(passFolder != nullptr); + + if (passFolder[0] != 0) { + // Create directory prefix. + std::string prefix = GetDumpCFGFolder(); + prefix += passFolder; + prefix += "/"; + + c_unit->mir_graph->DumpCFG(prefix.c_str(), false); + } + } + } + + c_unit->print_pass = old_print_pass; + } + + // If the pass gate passed, we can declare success. + return should_apply_pass; + } + + const char* GetDumpCFGFolder() const { + return dump_cfg_folder_; + } + protected: /** @brief The data holder that contains data needed for the PassDriverME. */ PassMEDataHolder pass_me_data_holder_; /** @brief Dump CFG base folder: where is the base folder for dumping CFGs. */ const char* dump_cfg_folder_; -}; + static void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass, + DataflowIterator* iterator) { + // Paranoid: Check the iterator before walking the BasicBlocks. + DCHECK(iterator != nullptr); + bool change = false; + for (BasicBlock* bb = iterator->Next(change); bb != nullptr; bb = iterator->Next(change)) { + data->bb = bb; + change = pass->Worker(data); + } + } + + template <typename Iterator> + inline static void DoWalkBasicBlocks(PassMEDataHolder* data, const PassME* pass) { + DCHECK(data != nullptr); + CompilationUnit* c_unit = data->c_unit; + DCHECK(c_unit != nullptr); + Iterator iterator(c_unit->mir_graph.get()); + DoWalkBasicBlocks(data, pass, &iterator); + } +}; } // namespace art #endif // ART_COMPILER_DEX_PASS_DRIVER_ME_H_ + diff --git a/compiler/dex/pass_driver_me_opts.cc b/compiler/dex/pass_driver_me_opts.cc new file mode 100644 index 0000000000..52a2273c40 --- /dev/null +++ b/compiler/dex/pass_driver_me_opts.cc @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/macros.h" +#include "bb_optimizations.h" +#include "compiler_internals.h" +#include "dataflow_iterator.h" +#include "dataflow_iterator-inl.h" +#include "pass_driver_me_opts.h" + +namespace art { + +/* + * Create the pass list. These passes are immutable and are shared across the threads. + * + * Advantage is that there will be no race conditions here. + * Disadvantage is the passes can't change their internal states depending on CompilationUnit: + * - This is not yet an issue: no current pass would require it. + */ +// The initial list of passes to be used by the PassDriveMEOpts. +template<> +const Pass* const PassDriver<PassDriverMEOpts>::g_passes[] = { + GetPassInstance<CacheFieldLoweringInfo>(), + GetPassInstance<CacheMethodLoweringInfo>(), + GetPassInstance<CallInlining>(), + GetPassInstance<CodeLayout>(), + GetPassInstance<NullCheckEliminationAndTypeInference>(), + GetPassInstance<ClassInitCheckElimination>(), + GetPassInstance<BBCombine>(), + GetPassInstance<BBOptimizations>(), +}; + +// The number of the passes in the initial list of Passes (g_passes). +template<> +uint16_t const PassDriver<PassDriverMEOpts>::g_passes_size = + arraysize(PassDriver<PassDriverMEOpts>::g_passes); + +// The default pass list is used by the PassDriverME instance of PassDriver +// to initialize pass_list_. +template<> +std::vector<const Pass*> PassDriver<PassDriverMEOpts>::g_default_pass_list( + PassDriver<PassDriverMEOpts>::g_passes, + PassDriver<PassDriverMEOpts>::g_passes + + PassDriver<PassDriverMEOpts>::g_passes_size); + +// By default, do not have a dump pass list. +template<> +std::string PassDriver<PassDriverMEOpts>::dump_pass_list_ = std::string(); + +// By default, do not have a print pass list. +template<> +std::string PassDriver<PassDriverMEOpts>::print_pass_list_ = std::string(); + +// By default, we do not print the pass' information. +template<> +bool PassDriver<PassDriverMEOpts>::default_print_passes_ = false; + +void PassDriverMEOpts::ApplyPass(PassDataHolder* data, const Pass* pass) { + // First call the base class' version. + PassDriver::ApplyPass(data, pass); + + const PassME* pass_me = down_cast<const PassME*> (pass); + DCHECK(pass_me != nullptr); + + PassMEDataHolder* pass_me_data_holder = down_cast<PassMEDataHolder*>(data); + + // Now we care about flags. + if ((pass_me->GetFlag(kOptimizationBasicBlockChange) == true) || + (pass_me->GetFlag(kOptimizationDefUsesChange) == true)) { + CompilationUnit* c_unit = pass_me_data_holder->c_unit; + c_unit->mir_graph.get()->CalculateBasicBlockInformation(); + } +} + +} // namespace art diff --git a/compiler/dex/pass_driver_me_opts.h b/compiler/dex/pass_driver_me_opts.h new file mode 100644 index 0000000000..0a5b5aec99 --- /dev/null +++ b/compiler/dex/pass_driver_me_opts.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_ +#define ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_ + +#include "pass_driver_me.h" + +namespace art { + +// Forward Declarations. +struct CompilationUnit; +class Pass; +class PassDataHolder; + +class PassDriverMEOpts : public PassDriverME<PassDriverMEOpts> { + public: + explicit PassDriverMEOpts(CompilationUnit* cu):PassDriverME<PassDriverMEOpts>(cu) { + } + + ~PassDriverMEOpts() { + } + + /** + * @brief Apply a patch: perform start/work/end functions. + */ + virtual void ApplyPass(PassDataHolder* data, const Pass* pass); +}; + +} // namespace art +#endif // ART_COMPILER_DEX_PASS_DRIVER_ME_OPTS_H_ diff --git a/compiler/dex/pass_driver_me_post_opt.cc b/compiler/dex/pass_driver_me_post_opt.cc new file mode 100644 index 0000000000..cb63f4184f --- /dev/null +++ b/compiler/dex/pass_driver_me_post_opt.cc @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "base/macros.h" +#include "post_opt_passes.h" +#include "compiler_internals.h" +#include "pass_driver_me_post_opt.h" + +namespace art { + +/* + * Create the pass list. These passes are immutable and are shared across the threads. + * + * Advantage is that there will be no race conditions here. + * Disadvantage is the passes can't change their internal states depending on CompilationUnit: + * - This is not yet an issue: no current pass would require it. + */ +// The initial list of passes to be used by the PassDriveMEPostOpt. +template<> +const Pass* const PassDriver<PassDriverMEPostOpt>::g_passes[] = { + GetPassInstance<InitializeData>(), + GetPassInstance<ClearPhiInstructions>(), + GetPassInstance<CalculatePredecessors>(), + GetPassInstance<DFSOrders>(), + GetPassInstance<BuildDomination>(), + GetPassInstance<DefBlockMatrix>(), + GetPassInstance<CreatePhiNodes>(), + GetPassInstance<ClearVisitedFlag>(), + GetPassInstance<SSAConversion>(), + GetPassInstance<PhiNodeOperands>(), + GetPassInstance<ConstantPropagation>(), + GetPassInstance<PerformInitRegLocations>(), + GetPassInstance<MethodUseCount>(), + GetPassInstance<FreeData>(), +}; + +// The number of the passes in the initial list of Passes (g_passes). +template<> +uint16_t const PassDriver<PassDriverMEPostOpt>::g_passes_size = + arraysize(PassDriver<PassDriverMEPostOpt>::g_passes); + +// The default pass list is used by the PassDriverME instance of PassDriver +// to initialize pass_list_. +template<> +std::vector<const Pass*> PassDriver<PassDriverMEPostOpt>::g_default_pass_list( + PassDriver<PassDriverMEPostOpt>::g_passes, + PassDriver<PassDriverMEPostOpt>::g_passes + + PassDriver<PassDriverMEPostOpt>::g_passes_size); + +// By default, do not have a dump pass list. +template<> +std::string PassDriver<PassDriverMEPostOpt>::dump_pass_list_ = std::string(); + +// By default, do not have a print pass list. +template<> +std::string PassDriver<PassDriverMEPostOpt>::print_pass_list_ = std::string(); + +// By default, we do not print the pass' information. +template<> +bool PassDriver<PassDriverMEPostOpt>::default_print_passes_ = false; + +} // namespace art diff --git a/compiler/dex/pass_driver_me_post_opt.h b/compiler/dex/pass_driver_me_post_opt.h new file mode 100644 index 0000000000..574a6ba04d --- /dev/null +++ b/compiler/dex/pass_driver_me_post_opt.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_ +#define ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_ + +#include "pass_driver_me.h" + +namespace art { + +// Forward Declarations. +struct CompilationUnit; +class Pass; +class PassDataHolder; + +class PassDriverMEPostOpt : public PassDriverME<PassDriverMEPostOpt> { + public: + explicit PassDriverMEPostOpt(CompilationUnit* cu) : PassDriverME<PassDriverMEPostOpt>(cu) { + } + + ~PassDriverMEPostOpt() { + } +}; + +} // namespace art +#endif // ART_COMPILER_DEX_PASS_DRIVER_ME_POST_OPT_H_ diff --git a/compiler/dex/pass_me.h b/compiler/dex/pass_me.h index 069fb45dc4..9efd5aeb40 100644 --- a/compiler/dex/pass_me.h +++ b/compiler/dex/pass_me.h @@ -32,6 +32,9 @@ class Pass; * @details Each enum should be a power of 2 to be correctly used. */ enum OptimizationFlag { + kOptimizationBasicBlockChange = 1, /**< @brief Has there been a change to a BasicBlock? */ + kOptimizationDefUsesChange = 2, /**< @brief Has there been a change to a def-use? */ + kLoopStructureChange = 4, /**< @brief Has there been a loop structural change? */ }; // Data holder class. @@ -93,7 +96,7 @@ class PassME: public Pass { /** @brief Type of traversal: determines the order to execute the pass on the BasicBlocks. */ const DataFlowAnalysisMode traversal_type_; - /** @brief Flags for additional directives: used to determine if a particular clean-up is necessary post pass. */ + /** @brief Flags for additional directives: used to determine if a particular post-optimization pass is necessary. */ const unsigned int flags_; /** @brief CFG Dump Folder: what sub-folder to use for dumping the CFGs post pass. */ diff --git a/compiler/dex/portable/mir_to_gbc.cc b/compiler/dex/portable/mir_to_gbc.cc index 576e2424fa..fd67608e73 100644 --- a/compiler/dex/portable/mir_to_gbc.cc +++ b/compiler/dex/portable/mir_to_gbc.cc @@ -712,7 +712,7 @@ bool MirConverter::ConvertMIRNode(MIR* mir, BasicBlock* bb, int opt_flags = mir->optimization_flags; if (cu_->verbose) { - if (op_val < kMirOpFirst) { + if (!IsPseudoMirOp(op_val)) { LOG(INFO) << ".. " << Instruction::Name(opcode) << " 0x" << std::hex << op_val; } else { LOG(INFO) << mir_graph_->extended_mir_op_names_[op_val - kMirOpFirst] << " 0x" << std::hex << op_val; @@ -1550,7 +1550,7 @@ void MirConverter::HandlePhiNodes(BasicBlock* bb, ::llvm::BasicBlock* llvm_bb) { SetDexOffset(bb->start_offset); for (MIR* mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { int opcode = mir->dalvikInsn.opcode; - if (opcode < kMirOpFirst) { + if (!IsPseudoMirOp(opcode)) { // Stop after first non-pseudo MIR op. continue; } @@ -1759,7 +1759,7 @@ bool MirConverter::BlockBitcodeConversion(BasicBlock* bb) { } } - if (opcode >= kMirOpFirst) { + if (IsPseudoMirOp(opcode)) { ConvertExtendedMIR(bb, mir, llvm_bb); continue; } diff --git a/compiler/dex/post_opt_passes.cc b/compiler/dex/post_opt_passes.cc new file mode 100644 index 0000000000..58700a4bd3 --- /dev/null +++ b/compiler/dex/post_opt_passes.cc @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "post_opt_passes.h" +#include "dataflow_iterator.h" +#include "dataflow_iterator-inl.h" + +namespace art { + +/* + * MethodUseCount pass implementation start. + */ +bool MethodUseCount::Gate(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + // First initialize the data. + c_unit->mir_graph->InitializeMethodUses(); + + // Now check if the pass is to be ignored. + bool res = ((c_unit->disable_opt & (1 << kPromoteRegs)) == 0); + + return res; +} + +bool MethodUseCount::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* c_unit = pass_me_data_holder->c_unit; + DCHECK(c_unit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); + c_unit->mir_graph->CountUses(bb); + // No need of repeating, so just return false. + return false; +} + + +bool ClearPhiInstructions::Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + const PassMEDataHolder* pass_me_data_holder = down_cast<const PassMEDataHolder*>(data); + CompilationUnit* c_unit = pass_me_data_holder->c_unit; + DCHECK(c_unit != nullptr); + BasicBlock* bb = pass_me_data_holder->bb; + DCHECK(bb != nullptr); + MIR* mir = bb->first_mir_insn; + + while (mir != nullptr) { + MIR* next = mir->next; + + Instruction::Code opcode = mir->dalvikInsn.opcode; + + if (opcode == static_cast<Instruction::Code> (kMirOpPhi)) { + bb->RemoveMIR(mir); + } + + mir = next; + } + + // We do not care in reporting a change or not in the MIR. + return false; +} + +void CalculatePredecessors::Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + // First get the MIRGraph here to factorize a bit the code. + MIRGraph *mir_graph = c_unit->mir_graph.get(); + + // First clear all predecessors. + AllNodesIterator first(mir_graph); + for (BasicBlock* bb = first.Next(); bb != nullptr; bb = first.Next()) { + bb->predecessors->Reset(); + } + + // Now calculate all predecessors. + AllNodesIterator second(mir_graph); + for (BasicBlock* bb = second.Next(); bb != nullptr; bb = second.Next()) { + // We only care about non hidden blocks. + if (bb->hidden == true) { + continue; + } + + // Create iterator for visiting children. + ChildBlockIterator child_iter(bb, mir_graph); + + // Now iterate through the children to set the predecessor bits. + for (BasicBlock* child = child_iter.Next(); child != nullptr; child = child_iter.Next()) { + child->predecessors->Insert(bb->id); + } + } +} + +} // namespace art diff --git a/compiler/dex/post_opt_passes.h b/compiler/dex/post_opt_passes.h new file mode 100644 index 0000000000..f2035052c9 --- /dev/null +++ b/compiler/dex/post_opt_passes.h @@ -0,0 +1,284 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_DEX_POST_OPT_PASSES_H_ +#define ART_COMPILER_DEX_POST_OPT_PASSES_H_ + +#include "compiler_internals.h" +#include "pass_me.h" + +namespace art { + +/** + * @class InitializeData + * @brief There is some data that needs to be initialized before performing + * the post optimization passes. + */ +class InitializeData : public PassME { + public: + InitializeData() : PassME("InitializeData") { + } + + void Start(const PassDataHolder* data) const { + // New blocks may have been inserted so the first thing we do is ensure that + // the c_unit's number of blocks matches the actual count of basic blocks. + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph.get()->InitializeBasicBlockData(); + c_unit->mir_graph.get()->SSATransformationStart(); + } +}; + +/** + * @class MethodUseCount + * @brief Count the register uses of the method + */ +class MethodUseCount : public PassME { + public: + MethodUseCount() : PassME("UseCount") { + } + + bool Worker(const PassDataHolder* data) const; + + bool Gate(const PassDataHolder* data) const; +}; + +/** + * @class ClearPhiInformation + * @brief Clear the PHI nodes from the CFG. + */ +class ClearPhiInstructions : public PassME { + public: + ClearPhiInstructions() : PassME("ClearPhiInstructions") { + } + + bool Worker(const PassDataHolder* data) const; +}; + +/** + * @class CalculatePredecessors + * @brief Calculate the predecessor BitVector of each Basicblock. + */ +class CalculatePredecessors : public PassME { + public: + CalculatePredecessors() : PassME("CalculatePredecessors") { + } + + void Start(const PassDataHolder* data) const; +}; + +/** + * @class DFSOrders + * @brief Compute the DFS order of the MIR graph + */ +class DFSOrders : public PassME { + public: + DFSOrders() : PassME("DFSOrders") { + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph.get()->ComputeDFSOrders(); + } +}; + +/** + * @class BuildDomination + * @brief Build the domination information of the MIR Graph + */ +class BuildDomination : public PassME { + public: + BuildDomination() : PassME("BuildDomination") { + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph.get()->ComputeDominators(); + c_unit->mir_graph.get()->CompilerInitializeSSAConversion(); + } + + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + // Verify the dataflow information after the pass. + if (c_unit->enable_debug & (1 << kDebugVerifyDataflow)) { + c_unit->mir_graph->VerifyDataflow(); + } + } +}; + +/** + * @class DefBlockMatrix + * @brief Calculate the matrix of definition per basic block + */ +class DefBlockMatrix : public PassME { + public: + DefBlockMatrix() : PassME("DefBlockMatrix") { + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph.get()->ComputeDefBlockMatrix(); + } +}; + +/** + * @class CreatePhiNodes + * @brief Pass to create the phi nodes after SSA calculation + */ +class CreatePhiNodes : public PassME { + public: + CreatePhiNodes() : PassME("CreatePhiNodes") { + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph.get()->InsertPhiNodes(); + } +}; + +/** + * @class ClearVisitedFlag + * @brief Pass to clear the visited flag for all basic blocks. + */ + +class ClearVisitedFlag : public PassME { + public: + ClearVisitedFlag() : PassME("ClearVisitedFlag") { + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph.get()->ClearAllVisitedFlags(); + } +}; + +/** + * @class SSAConversion + * @brief Pass for SSA conversion of MIRs + */ +class SSAConversion : public PassME { + public: + SSAConversion() : PassME("SSAConversion") { + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + MIRGraph *mir_graph = c_unit->mir_graph.get(); + mir_graph->DoDFSPreOrderSSARename(mir_graph->GetEntryBlock()); + } +}; + +/** + * @class PhiNodeOperands + * @brief Pass to insert the Phi node operands to basic blocks + */ +class PhiNodeOperands : public PassME { + public: + PhiNodeOperands() : PassME("PhiNodeOperands", kPreOrderDFSTraversal) { + } + + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + BasicBlock* bb = down_cast<const PassMEDataHolder*>(data)->bb; + DCHECK(bb != nullptr); + c_unit->mir_graph->InsertPhiNodeOperands(bb); + // No need of repeating, so just return false. + return false; + } +}; + +/** + * @class InitRegLocations + * @brief Initialize Register Locations. + */ +class PerformInitRegLocations : public PassME { + public: + PerformInitRegLocations() : PassME("PerformInitRegLocation") { + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph->InitRegLocations(); + } +}; + +/** + * @class ConstantPropagation + * @brief Perform a constant propagation pass. + */ +class ConstantPropagation : public PassME { + public: + ConstantPropagation() : PassME("ConstantPropagation") { + } + + bool Worker(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + BasicBlock* bb = down_cast<const PassMEDataHolder*>(data)->bb; + DCHECK(bb != nullptr); + c_unit->mir_graph->DoConstantPropagation(bb); + // No need of repeating, so just return false. + return false; + } + + void Start(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph->InitializeConstantPropagation(); + } +}; + +/** + * @class FreeData + * @brief There is some data that needs to be freed after performing the post optimization passes. + */ +class FreeData : public PassME { + public: + FreeData() : PassME("FreeData") { + } + + void End(const PassDataHolder* data) const { + DCHECK(data != nullptr); + CompilationUnit* c_unit = down_cast<const PassMEDataHolder*>(data)->c_unit; + DCHECK(c_unit != nullptr); + c_unit->mir_graph.get()->SSATransformationEnd(); + } +}; + +} // namespace art + +#endif // ART_COMPILER_DEX_POST_OPT_PASSES_H_ diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 5d74b8dee3..9f9e61845c 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -313,11 +313,11 @@ void ArmMir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { void ArmMir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - RegStorage reset_reg = AllocTemp(); - Load32Disp(rs_rARM_SELF, ex_offset, rl_result.reg); + RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); + RegStorage reset_reg = AllocTempRef(); + LoadRefDisp(rs_rARM_SELF, ex_offset, rl_result.reg); LoadConstant(reset_reg, 0); - Store32Disp(rs_rARM_SELF, ex_offset, reset_reg); + StoreRefDisp(rs_rARM_SELF, ex_offset, reset_reg); FreeTemp(reset_reg); StoreValue(rl_dest, rl_result); } diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h index f0a9ca4e82..9c801a520b 100644 --- a/compiler/dex/quick/arm/codegen_arm.h +++ b/compiler/dex/quick/arm/codegen_arm.h @@ -59,6 +59,7 @@ class ArmMir2Lir FINAL : public Mir2Lir { RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); + RegLocation LocCReturnRef(); RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc index bb02f74751..e06d814e16 100644 --- a/compiler/dex/quick/arm/fp_arm.cc +++ b/compiler/dex/quick/arm/fp_arm.cc @@ -51,7 +51,7 @@ void ArmMir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest, FlushAllRegs(); // Send everything to home location CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2, false); - rl_result = GetReturn(true); + rl_result = GetReturn(kFPReg); StoreValue(rl_dest, rl_result); return; case Instruction::NEG_FLOAT: @@ -94,7 +94,7 @@ void ArmMir2Lir::GenArithOpDouble(Instruction::Code opcode, FlushAllRegs(); // Send everything to home location CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2, false); - rl_result = GetReturnWide(true); + rl_result = GetReturnWide(kFPReg); StoreValueWide(rl_dest, rl_result); return; case Instruction::NEG_DOUBLE: @@ -141,8 +141,11 @@ void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, Re break; case Instruction::LONG_TO_DOUBLE: { rl_src = LoadValueWide(rl_src, kFPReg); - RegStorage src_low = rl_src.reg.DoubleToLowSingle(); - RegStorage src_high = rl_src.reg.DoubleToHighSingle(); + RegisterInfo* info = GetRegInfo(rl_src.reg); + RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg(); + DCHECK(src_low.Valid()); + RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg(); + DCHECK(src_high.Valid()); rl_result = EvalLoc(rl_dest, kFPReg, true); RegStorage tmp1 = AllocTempDouble(); RegStorage tmp2 = AllocTempDouble(); @@ -161,8 +164,11 @@ void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, Re return; case Instruction::LONG_TO_FLOAT: { rl_src = LoadValueWide(rl_src, kFPReg); - RegStorage src_low = rl_src.reg.DoubleToLowSingle(); - RegStorage src_high = rl_src.reg.DoubleToHighSingle(); + RegisterInfo* info = GetRegInfo(rl_src.reg); + RegStorage src_low = info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg(); + DCHECK(src_low.Valid()); + RegStorage src_high = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg(); + DCHECK(src_high.Valid()); rl_result = EvalLoc(rl_dest, kFPReg, true); // Allocate temp registers. RegStorage high_val = AllocTempDouble(); @@ -334,22 +340,11 @@ void ArmMir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) { bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) { DCHECK_EQ(cu_->instruction_set, kThumb2); - LIR *branch; RegLocation rl_src = info->args[0]; RegLocation rl_dest = InlineTargetWide(info); // double place for result rl_src = LoadValueWide(rl_src, kFPReg); RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true); NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg()); - NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg()); - NewLIR0(kThumb2Fmstat); - branch = NewLIR2(kThumbBCond, 0, kArmCondEq); - ClobberCallerSave(); - LockCallTemps(); // Using fixed registers - RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt)); - NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg()); - NewLIR1(kThumbBlxR, r_tgt.GetReg()); - NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg()); - branch->target = NewLIR0(kPseudoTargetLabel); StoreValueWide(rl_dest, rl_result); return true; } diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc index 2556788bed..769122d8ec 100644 --- a/compiler/dex/quick/arm/int_arm.cc +++ b/compiler/dex/quick/arm/int_arm.cc @@ -206,13 +206,16 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); RegLocation rl_dest = mir_graph_->GetDest(mir); - rl_src = LoadValue(rl_src, kCoreReg); + // Avoid using float regs here. + RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; + RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; + rl_src = LoadValue(rl_src, src_reg_class); ConditionCode ccode = mir->meta.ccode; if (mir->ssa_rep->num_uses == 1) { // CONST case int true_val = mir->dalvikInsn.vB; int false_val = mir->dalvikInsn.vC; - rl_result = EvalLoc(rl_dest, kCoreReg, true); + rl_result = EvalLoc(rl_dest, result_reg_class, true); // Change kCondNe to kCondEq for the special cases below. if (ccode == kCondNe) { ccode = kCondEq; @@ -239,8 +242,8 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { OpEndIT(it); // Add a scheduling barrier to keep the IT shadow intact } else { // Unlikely case - could be tuned. - RegStorage t_reg1 = AllocTemp(); - RegStorage t_reg2 = AllocTemp(); + RegStorage t_reg1 = AllocTypedTemp(false, result_reg_class); + RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class); LoadConstant(t_reg1, true_val); LoadConstant(t_reg2, false_val); OpRegImm(kOpCmp, rl_src.reg, 0); @@ -253,9 +256,9 @@ void ArmMir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { // MOVE case RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; - rl_true = LoadValue(rl_true, kCoreReg); - rl_false = LoadValue(rl_false, kCoreReg); - rl_result = EvalLoc(rl_dest, kCoreReg, true); + rl_true = LoadValue(rl_true, result_reg_class); + rl_false = LoadValue(rl_false, result_reg_class); + rl_result = EvalLoc(rl_dest, result_reg_class, true); OpRegImm(kOpCmp, rl_src.reg, 0); LIR* it = nullptr; if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place? @@ -814,10 +817,10 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // Release store semantics, get the barrier out of the way. TODO: revisit GenMemBarrier(kStoreLoad); - RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_new_value; if (!is_long) { - rl_new_value = LoadValue(rl_src_new_value, kCoreReg); + rl_new_value = LoadValue(rl_src_new_value); } else if (load_early) { rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); } @@ -840,7 +843,7 @@ bool ArmMir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegLocation rl_expected; if (!is_long) { - rl_expected = LoadValue(rl_src_expected, kCoreReg); + rl_expected = LoadValue(rl_src_expected); } else if (load_early) { rl_expected = LoadValueWide(rl_src_expected, kCoreReg); } else { @@ -1047,7 +1050,7 @@ void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, ThreadOffset<4> func_offset = QUICK_ENTRYPOINT_OFFSET(4, pLmul); FlushAllRegs(); CallRuntimeHelperRegLocationRegLocation(func_offset, rl_src1, rl_src2, false); - rl_result = GetReturnWide(false); + rl_result = GetReturnWide(kCoreReg); StoreValueWide(rl_dest, rl_result); return; } @@ -1126,7 +1129,7 @@ void ArmMir2Lir::GenMulLong(Instruction::Code opcode, RegLocation rl_dest, if (reg_status != 0) { // We had manually allocated registers for rl_result. // Now construct a RegLocation. - rl_result = GetReturnWide(false); // Just using as a template. + rl_result = GetReturnWide(kCoreReg); // Just using as a template. rl_result.reg = RegStorage::MakeRegPair(res_lo, res_hi); } @@ -1168,7 +1171,7 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, int data_offset; RegLocation rl_result; bool constant_index = rl_index.is_const; - rl_array = LoadValue(rl_array, kCoreReg); + rl_array = LoadValue(rl_array, kRefReg); if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } @@ -1203,7 +1206,7 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, reg_ptr = rl_array.reg; // NOTE: must not alter reg_ptr in constant case. } else { // No special indexed operation, lea + load w/ displacement - reg_ptr = AllocTemp(); + reg_ptr = AllocTempRef(); OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kArmLsl, scale)); FreeTemp(rl_index.reg); } @@ -1229,7 +1232,7 @@ void ArmMir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, } } else { // Offset base, then use indexed load - RegStorage reg_ptr = AllocTemp(); + RegStorage reg_ptr = AllocTempRef(); OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); FreeTemp(rl_array.reg); rl_result = EvalLoc(rl_dest, reg_class, true); @@ -1267,7 +1270,7 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, data_offset += mir_graph_->ConstantValue(rl_index) << scale; } - rl_array = LoadValue(rl_array, kCoreReg); + rl_array = LoadValue(rl_array, kRefReg); if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } @@ -1281,7 +1284,7 @@ void ArmMir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, reg_ptr = rl_array.reg; } else { allocated_reg_ptr_temp = true; - reg_ptr = AllocTemp(); + reg_ptr = AllocTempRef(); } /* null object? */ diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc index 1520c52a7a..5340d8370a 100644 --- a/compiler/dex/quick/arm/target_arm.cc +++ b/compiler/dex/quick/arm/target_arm.cc @@ -25,47 +25,43 @@ namespace art { -// TODO: rework this when c++11 support allows. -static const RegStorage core_regs_arr[] = +static constexpr RegStorage core_regs_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_rARM_SUSPEND, rs_r5, rs_r6, rs_r7, rs_r8, rs_rARM_SELF, rs_r10, rs_r11, rs_r12, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; -static const RegStorage sp_regs_arr[] = +static constexpr RegStorage sp_regs_arr[] = {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15, rs_fr16, rs_fr17, rs_fr18, rs_fr19, rs_fr20, rs_fr21, rs_fr22, rs_fr23, rs_fr24, rs_fr25, rs_fr26, rs_fr27, rs_fr28, rs_fr29, rs_fr30, rs_fr31}; -static const RegStorage dp_regs_arr[] = +static constexpr RegStorage dp_regs_arr[] = {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15}; -static const RegStorage reserved_regs_arr[] = +static constexpr RegStorage reserved_regs_arr[] = {rs_rARM_SUSPEND, rs_rARM_SELF, rs_rARM_SP, rs_rARM_LR, rs_rARM_PC}; -static const RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12}; -static const RegStorage sp_temps_arr[] = +static constexpr RegStorage core_temps_arr[] = {rs_r0, rs_r1, rs_r2, rs_r3, rs_r12}; +static constexpr RegStorage sp_temps_arr[] = {rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15}; -static const RegStorage dp_temps_arr[] = +static constexpr RegStorage dp_temps_arr[] = {rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7}; -static const std::vector<RegStorage> empty_pool; -static const std::vector<RegStorage> core_regs(core_regs_arr, - core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0])); -static const std::vector<RegStorage> sp_regs(sp_regs_arr, - sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0])); -static const std::vector<RegStorage> dp_regs(dp_regs_arr, - dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0])); -static const std::vector<RegStorage> reserved_regs(reserved_regs_arr, - reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0])); -static const std::vector<RegStorage> core_temps(core_temps_arr, - core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0])); -static const std::vector<RegStorage> sp_temps(sp_temps_arr, - sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0])); -static const std::vector<RegStorage> dp_temps(dp_temps_arr, - dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0])); +static constexpr ArrayRef<const RegStorage> empty_pool; +static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr); +static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr); +static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr); +static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr); +static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr); +static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr); +static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr); RegLocation ArmMir2Lir::LocCReturn() { return arm_loc_c_return; } +RegLocation ArmMir2Lir::LocCReturnRef() { + return arm_loc_c_return; +} + RegLocation ArmMir2Lir::LocCReturnWide() { return arm_loc_c_return_wide; } @@ -575,10 +571,10 @@ void ArmMir2Lir::CompilerInitializeRegAlloc() { // Redirect single precision's master storage to master. info->SetMaster(dp_reg_info); // Singles should show a single 32-bit mask bit, at first referring to the low half. - DCHECK_EQ(info->StorageMask(), 0x1U); + DCHECK_EQ(info->StorageMask(), RegisterInfo::kLowSingleStorageMask); if (sp_reg_num & 1) { - // For odd singles, change to user the high word of the backing double. - info->SetStorageMask(0x2); + // For odd singles, change to use the high word of the backing double. + info->SetStorageMask(RegisterInfo::kHighSingleStorageMask); } } @@ -786,10 +782,13 @@ RegStorage ArmMir2Lir::AllocPreservedDouble(int s_reg) { } } if (res.Valid()) { + RegisterInfo* info = GetRegInfo(res); promotion_map_[p_map_idx].fp_location = kLocPhysReg; - promotion_map_[p_map_idx].FpReg = res.DoubleToLowSingle().GetReg(); + promotion_map_[p_map_idx].FpReg = + info->FindMatchingView(RegisterInfo::kLowSingleStorageMask)->GetReg().GetReg(); promotion_map_[p_map_idx+1].fp_location = kLocPhysReg; - promotion_map_[p_map_idx+1].FpReg = res.DoubleToHighSingle().GetReg(); + promotion_map_[p_map_idx+1].FpReg = + info->FindMatchingView(RegisterInfo::kHighSingleStorageMask)->GetReg().GetReg(); } return res; } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index 2e3ef86b9c..d0f8e741b6 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -287,9 +287,9 @@ void Arm64Mir2Lir::GenMonitorExit(int opt_flags, RegLocation rl_src) { void Arm64Mir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset<8>().Int32Value(); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - Load32Disp(rs_rA64_SELF, ex_offset, rl_result.reg); - Store32Disp(rs_rA64_SELF, ex_offset, rs_xzr); + RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); + LoadRefDisp(rs_rA64_SELF, ex_offset, rl_result.reg); + StoreRefDisp(rs_rA64_SELF, ex_offset, rs_xzr); StoreValue(rl_dest, rl_result); } diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h index fddbfd79ac..6251f4f578 100644 --- a/compiler/dex/quick/arm64/codegen_arm64.h +++ b/compiler/dex/quick/arm64/codegen_arm64.h @@ -59,6 +59,7 @@ class Arm64Mir2Lir : public Mir2Lir { RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); + RegLocation LocCReturnRef(); RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); @@ -222,8 +223,6 @@ class Arm64Mir2Lir : public Mir2Lir { bool skip_this); private: - void GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, int64_t val, - ConditionCode ccode); LIR* LoadFPConstantValue(int r_dest, int32_t value); LIR* LoadFPConstantValueWide(int r_dest, int64_t value); void ReplaceFixup(LIR* prev_lir, LIR* orig_lir, LIR* new_lir); diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc index 882ee6660d..acc7d17b56 100644 --- a/compiler/dex/quick/arm64/fp_arm64.cc +++ b/compiler/dex/quick/arm64/fp_arm64.cc @@ -47,7 +47,7 @@ void Arm64Mir2Lir::GenArithOpFloat(Instruction::Code opcode, RegLocation rl_dest FlushAllRegs(); // Send everything to home location CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmodf), rl_src1, rl_src2, false); - rl_result = GetReturn(true); + rl_result = GetReturn(kFPReg); StoreValue(rl_dest, rl_result); return; case Instruction::NEG_FLOAT: @@ -90,7 +90,7 @@ void Arm64Mir2Lir::GenArithOpDouble(Instruction::Code opcode, FlushAllRegs(); // Send everything to home location CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(8, pFmod), rl_src1, rl_src2, false); - rl_result = GetReturnWide(true); + rl_result = GetReturnWide(kFPReg); StoreValueWide(rl_dest, rl_result); return; case Instruction::NEG_DOUBLE: diff --git a/compiler/dex/quick/arm64/int_arm64.cc b/compiler/dex/quick/arm64/int_arm64.cc index 8dad90aba6..0a76b9b295 100644 --- a/compiler/dex/quick/arm64/int_arm64.cc +++ b/compiler/dex/quick/arm64/int_arm64.cc @@ -29,7 +29,6 @@ LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage s return OpCondBranch(cond, target); } -// TODO(Arm64): remove this. LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) { LOG(FATAL) << "Unexpected use of OpIT for Arm64"; return NULL; @@ -42,8 +41,8 @@ void Arm64Mir2Lir::OpEndIT(LIR* it) { /* * 64-bit 3way compare function. * cmp xA, xB - * csinc wC, wzr, wzr, eq - * csneg wC, wC, wC, le + * csinc wC, wzr, wzr, eq // wC = (xA == xB) ? 0 : 1 + * csneg wC, wC, wC, ge // wC = (xA >= xB) ? wC : -wC */ void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) { @@ -53,10 +52,10 @@ void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1, rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - NewLIR4(WIDE(kA64Csinc4rrrc), rl_result.reg.GetReg(), rxzr, rxzr, kArmCondEq); - NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_result.reg.GetReg(), - rl_result.reg.GetReg(), kArmCondLe); - StoreValueWide(rl_dest, rl_result); + NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq); + NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(), + rl_result.reg.GetReg(), kArmCondGe); + StoreValue(rl_dest, rl_result); } void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, @@ -85,154 +84,60 @@ void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, StoreValueWide(rl_dest, rl_result); } -void Arm64Mir2Lir::GenFusedLongCmpImmBranch(BasicBlock* bb, RegLocation rl_src1, - int64_t val, ConditionCode ccode) { - LIR* taken = &block_label_list_[bb->taken]; - rl_src1 = LoadValueWide(rl_src1, kCoreReg); - - if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { - ArmOpcode opcode = (ccode == kCondEq) ? kA64Cbz2rt : kA64Cbnz2rt; - LIR* branch = NewLIR2(WIDE(opcode), rl_src1.reg.GetLowReg(), 0); - branch->target = taken; - } else { - OpRegImm64(kOpCmp, rl_src1.reg, val); - OpCondBranch(ccode, taken); - } -} - void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(FATAL); - RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); RegLocation rl_dest = mir_graph_->GetDest(mir); - rl_src = LoadValue(rl_src, kCoreReg); - ConditionCode ccode = mir->meta.ccode; - if (mir->ssa_rep->num_uses == 1) { - // CONST case - int true_val = mir->dalvikInsn.vB; - int false_val = mir->dalvikInsn.vC; - rl_result = EvalLoc(rl_dest, kCoreReg, true); - // Change kCondNe to kCondEq for the special cases below. - if (ccode == kCondNe) { - ccode = kCondEq; - std::swap(true_val, false_val); - } - bool cheap_false_val = InexpensiveConstantInt(false_val); - if (cheap_false_val && ccode == kCondEq && (true_val == 0 || true_val == -1)) { - OpRegRegImm(kOpSub, rl_result.reg, rl_src.reg, -true_val); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - OpIT(true_val == 0 ? kCondNe : kCondUge, ""); - LoadConstant(rl_result.reg, false_val); - GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact - } else if (cheap_false_val && ccode == kCondEq && true_val == 1) { - OpRegRegImm(kOpRsub, rl_result.reg, rl_src.reg, 1); - DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - OpIT(kCondLs, ""); - LoadConstant(rl_result.reg, false_val); - GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact - } else if (cheap_false_val && InexpensiveConstantInt(true_val)) { - OpRegImm(kOpCmp, rl_src.reg, 0); - OpIT(ccode, "E"); - LoadConstant(rl_result.reg, true_val); - LoadConstant(rl_result.reg, false_val); - GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact - } else { - // Unlikely case - could be tuned. - RegStorage t_reg1 = AllocTemp(); - RegStorage t_reg2 = AllocTemp(); - LoadConstant(t_reg1, true_val); - LoadConstant(t_reg2, false_val); - OpRegImm(kOpCmp, rl_src.reg, 0); - OpIT(ccode, "E"); - OpRegCopy(rl_result.reg, t_reg1); - OpRegCopy(rl_result.reg, t_reg2); - GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact - } - } else { - // MOVE case - RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; - RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; - rl_true = LoadValue(rl_true, kCoreReg); - rl_false = LoadValue(rl_false, kCoreReg); - rl_result = EvalLoc(rl_dest, kCoreReg, true); - OpRegImm(kOpCmp, rl_src.reg, 0); - if (rl_result.reg.GetReg() == rl_true.reg.GetReg()) { // Is the "true" case already in place? - OpIT(NegateComparison(ccode), ""); - OpRegCopy(rl_result.reg, rl_false.reg); - } else if (rl_result.reg.GetReg() == rl_false.reg.GetReg()) { // False case in place? - OpIT(ccode, ""); - OpRegCopy(rl_result.reg, rl_true.reg); - } else { // Normal - select between the two. - OpIT(ccode, "E"); - OpRegCopy(rl_result.reg, rl_true.reg); - OpRegCopy(rl_result.reg, rl_false.reg); - } - GenBarrier(); // Add a scheduling barrier to keep the IT shadow intact - } + RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; + RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; + rl_src = LoadValue(rl_src, src_reg_class); + ArmConditionCode code = ArmConditionEncoding(mir->meta.ccode); + + RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]]; + RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]]; + rl_true = LoadValue(rl_true, result_reg_class); + rl_false = LoadValue(rl_false, result_reg_class); + rl_result = EvalLoc(rl_dest, result_reg_class, true); + OpRegImm(kOpCmp, rl_src.reg, 0); + NewLIR4(kA64Csel4rrrc, rl_result.reg.GetReg(), rl_true.reg.GetReg(), + rl_false.reg.GetReg(), code); StoreValue(rl_dest, rl_result); } void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) { - // TODO(Arm64): implement this. - UNIMPLEMENTED(FATAL); - RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0); RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2); + LIR* taken = &block_label_list_[bb->taken]; + LIR* not_taken = &block_label_list_[bb->fall_through]; + rl_src1 = LoadValueWide(rl_src1, kCoreReg); // Normalize such that if either operand is constant, src2 will be constant. ConditionCode ccode = mir->meta.ccode; if (rl_src1.is_const) { std::swap(rl_src1, rl_src2); ccode = FlipComparisonOrder(ccode); } + if (rl_src2.is_const) { - RegLocation rl_temp = UpdateLocWide(rl_src2); - // Do special compare/branch against simple const operand if not already in registers. + rl_src2 = UpdateLocWide(rl_src2); int64_t val = mir_graph_->ConstantValueWide(rl_src2); - if ((rl_temp.location != kLocPhysReg) - /*&& ((ModifiedImmediate(Low32Bits(val)) >= 0) && (ModifiedImmediate(High32Bits(val)) >= 0))*/) { - GenFusedLongCmpImmBranch(bb, rl_src1, val, ccode); + // Special handling using cbz & cbnz. + if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) { + OpCmpImmBranch(ccode, rl_src1.reg, 0, taken); + OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken); + return; + // Only handle Imm if src2 is not already in a register. + } else if (rl_src2.location != kLocPhysReg) { + OpRegImm64(kOpCmp, rl_src1.reg, val); + OpCondBranch(ccode, taken); + OpCondBranch(NegateComparison(ccode), not_taken); return; } } - LIR* taken = &block_label_list_[bb->taken]; - LIR* not_taken = &block_label_list_[bb->fall_through]; - rl_src1 = LoadValueWide(rl_src1, kCoreReg); + rl_src2 = LoadValueWide(rl_src2, kCoreReg); - OpRegReg(kOpCmp, rl_src1.reg.GetHigh(), rl_src2.reg.GetHigh()); - switch (ccode) { - case kCondEq: - OpCondBranch(kCondNe, not_taken); - break; - case kCondNe: - OpCondBranch(kCondNe, taken); - break; - case kCondLt: - OpCondBranch(kCondLt, taken); - OpCondBranch(kCondGt, not_taken); - ccode = kCondUlt; - break; - case kCondLe: - OpCondBranch(kCondLt, taken); - OpCondBranch(kCondGt, not_taken); - ccode = kCondLs; - break; - case kCondGt: - OpCondBranch(kCondGt, taken); - OpCondBranch(kCondLt, not_taken); - ccode = kCondHi; - break; - case kCondGe: - OpCondBranch(kCondGt, taken); - OpCondBranch(kCondLt, not_taken); - ccode = kCondUge; - break; - default: - LOG(FATAL) << "Unexpected ccode: " << ccode; - } - OpRegReg(kOpCmp, rl_src1.reg.GetLow(), rl_src2.reg.GetLow()); + OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); OpCondBranch(ccode, taken); + OpCondBranch(NegateComparison(ccode), not_taken); } /* @@ -468,7 +373,7 @@ bool Arm64Mir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) { RegLocation rl_dest = InlineTarget(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg); - OpIT((is_min) ? kCondGt : kCondLt, "E"); + // OpIT((is_min) ? kCondGt : kCondLt, "E"); OpRegReg(kOpMov, rl_result.reg, rl_src2.reg); OpRegReg(kOpMov, rl_result.reg, rl_src1.reg); GenBarrier(); @@ -598,10 +503,10 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { // Release store semantics, get the barrier out of the way. TODO: revisit GenMemBarrier(kStoreLoad); - RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_new_value; if (!is_long) { - rl_new_value = LoadValue(rl_src_new_value, kCoreReg); + rl_new_value = LoadValue(rl_src_new_value); } else if (load_early) { rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg); } @@ -624,7 +529,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegLocation rl_expected; if (!is_long) { - rl_expected = LoadValue(rl_src_expected, kCoreReg); + rl_expected = LoadValue(rl_src_expected); } else if (load_early) { rl_expected = LoadValueWide(rl_src_expected, kCoreReg); } else { @@ -668,7 +573,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { NewLIR3(kA64Ldxr2rX, r_tmp.GetReg(), r_ptr.GetReg(), 0); OpRegReg(kOpSub, r_tmp, rl_expected.reg); DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - OpIT(kCondEq, "T"); + // OpIT(kCondEq, "T"); NewLIR4(kA64Stxr3wrX /* eq */, r_tmp.GetReg(), rl_new_value.reg.GetReg(), r_ptr.GetReg(), 0); } @@ -684,7 +589,7 @@ bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); OpRegRegImm(kOpRsub, rl_result.reg, r_tmp, 1); DCHECK(last_lir_insn_->u.m.def_mask & ENCODE_CCODE); - OpIT(kCondUlt, ""); + // OpIT(kCondUlt, ""); LoadConstant(rl_result.reg, 0); /* cc */ FreeTemp(r_tmp); // Now unneeded. @@ -866,7 +771,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, int data_offset; RegLocation rl_result; bool constant_index = rl_index.is_const; - rl_array = LoadValue(rl_array, kCoreReg); + rl_array = LoadValue(rl_array, kRefReg); if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } @@ -901,7 +806,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, reg_ptr = rl_array.reg; // NOTE: must not alter reg_ptr in constant case. } else { // No special indexed operation, lea + load w/ displacement - reg_ptr = AllocTemp(); + reg_ptr = AllocTempRef(); OpRegRegRegShift(kOpAdd, reg_ptr, rl_array.reg, rl_index.reg, EncodeShift(kA64Lsl, scale)); FreeTemp(rl_index.reg); } @@ -927,7 +832,7 @@ void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, } } else { // Offset base, then use indexed load - RegStorage reg_ptr = AllocTemp(); + RegStorage reg_ptr = AllocTempRef(); OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset); FreeTemp(rl_array.reg); rl_result = EvalLoc(rl_dest, reg_class, true); @@ -968,7 +873,7 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, data_offset += mir_graph_->ConstantValue(rl_index) << scale; } - rl_array = LoadValue(rl_array, kCoreReg); + rl_array = LoadValue(rl_array, kRefReg); if (!constant_index) { rl_index = LoadValue(rl_index, kCoreReg); } @@ -982,7 +887,7 @@ void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, reg_ptr = rl_array.reg; } else { allocated_reg_ptr_temp = true; - reg_ptr = AllocTemp(); + reg_ptr = AllocTempRef(); } /* null object? */ diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc index 02224476ff..b287399900 100644 --- a/compiler/dex/quick/arm64/target_arm64.cc +++ b/compiler/dex/quick/arm64/target_arm64.cc @@ -25,77 +25,72 @@ namespace art { -// TODO: rework this when c++11 support allows. -static const RegStorage core_regs_arr[] = +static constexpr RegStorage core_regs_arr[] = {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7, rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16, rs_w17, rs_w18, rs_w19, rs_w20, rs_w21, rs_w22, rs_w23, rs_w24, rs_w25, rs_w26, rs_w27, rs_w28, rs_w29, rs_w30, rs_w31, rs_wzr}; -static const RegStorage core64_regs_arr[] = +static constexpr RegStorage core64_regs_arr[] = {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7, rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16, rs_x17, rs_x18, rs_x19, rs_x20, rs_x21, rs_x22, rs_x23, rs_x24, rs_x25, rs_x26, rs_x27, rs_x28, rs_x29, rs_x30, rs_x31, rs_xzr}; -static const RegStorage sp_regs_arr[] = +static constexpr RegStorage sp_regs_arr[] = {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7, rs_f8, rs_f9, rs_f10, rs_f11, rs_f12, rs_f13, rs_f14, rs_f15, rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23, rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31}; -static const RegStorage dp_regs_arr[] = +static constexpr RegStorage dp_regs_arr[] = {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7, rs_d8, rs_d9, rs_d10, rs_d11, rs_d12, rs_d13, rs_d14, rs_d15, rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23, rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31}; -static const RegStorage reserved_regs_arr[] = +static constexpr RegStorage reserved_regs_arr[] = {rs_rA32_SUSPEND, rs_rA32_SELF, rs_rA32_SP, rs_rA32_LR, rs_wzr}; -static const RegStorage reserved64_regs_arr[] = +static constexpr RegStorage reserved64_regs_arr[] = {rs_rA64_SUSPEND, rs_rA64_SELF, rs_rA64_SP, rs_rA64_LR, rs_xzr}; // TUNING: Are there too many temp registers and too less promote target? // This definition need to be matched with runtime.cc, quick entry assembly and JNI compiler // Note: we are not able to call to C function directly if it un-match C ABI. // Currently, rs_rA64_SELF is not a callee save register which does not match C ABI. -static const RegStorage core_temps_arr[] = +static constexpr RegStorage core_temps_arr[] = {rs_w0, rs_w1, rs_w2, rs_w3, rs_w4, rs_w5, rs_w6, rs_w7, rs_w8, rs_w9, rs_w10, rs_w11, rs_w12, rs_w13, rs_w14, rs_w15, rs_w16, rs_w17}; -static const RegStorage core64_temps_arr[] = +static constexpr RegStorage core64_temps_arr[] = {rs_x0, rs_x1, rs_x2, rs_x3, rs_x4, rs_x5, rs_x6, rs_x7, rs_x8, rs_x9, rs_x10, rs_x11, rs_x12, rs_x13, rs_x14, rs_x15, rs_x16, rs_x17}; -static const RegStorage sp_temps_arr[] = +static constexpr RegStorage sp_temps_arr[] = {rs_f0, rs_f1, rs_f2, rs_f3, rs_f4, rs_f5, rs_f6, rs_f7, rs_f16, rs_f17, rs_f18, rs_f19, rs_f20, rs_f21, rs_f22, rs_f23, rs_f24, rs_f25, rs_f26, rs_f27, rs_f28, rs_f29, rs_f30, rs_f31}; -static const RegStorage dp_temps_arr[] = +static constexpr RegStorage dp_temps_arr[] = {rs_d0, rs_d1, rs_d2, rs_d3, rs_d4, rs_d5, rs_d6, rs_d7, rs_d16, rs_d17, rs_d18, rs_d19, rs_d20, rs_d21, rs_d22, rs_d23, rs_d24, rs_d25, rs_d26, rs_d27, rs_d28, rs_d29, rs_d30, rs_d31}; -static const std::vector<RegStorage> core_regs(core_regs_arr, - core_regs_arr + arraysize(core_regs_arr)); -static const std::vector<RegStorage> core64_regs(core64_regs_arr, - core64_regs_arr + arraysize(core64_regs_arr)); -static const std::vector<RegStorage> sp_regs(sp_regs_arr, - sp_regs_arr + arraysize(sp_regs_arr)); -static const std::vector<RegStorage> dp_regs(dp_regs_arr, - dp_regs_arr + arraysize(dp_regs_arr)); -static const std::vector<RegStorage> reserved_regs(reserved_regs_arr, - reserved_regs_arr + arraysize(reserved_regs_arr)); -static const std::vector<RegStorage> reserved64_regs(reserved64_regs_arr, - reserved64_regs_arr + arraysize(reserved64_regs_arr)); -static const std::vector<RegStorage> core_temps(core_temps_arr, - core_temps_arr + arraysize(core_temps_arr)); -static const std::vector<RegStorage> core64_temps(core64_temps_arr, - core64_temps_arr + arraysize(core64_temps_arr)); -static const std::vector<RegStorage> sp_temps(sp_temps_arr, sp_temps_arr + arraysize(sp_temps_arr)); -static const std::vector<RegStorage> dp_temps(dp_temps_arr, dp_temps_arr + arraysize(dp_temps_arr)); +static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr); +static constexpr ArrayRef<const RegStorage> core64_regs(core64_regs_arr); +static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr); +static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr); +static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr); +static constexpr ArrayRef<const RegStorage> reserved64_regs(reserved64_regs_arr); +static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr); +static constexpr ArrayRef<const RegStorage> core64_temps(core64_temps_arr); +static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr); +static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr); RegLocation Arm64Mir2Lir::LocCReturn() { return arm_loc_c_return; } +RegLocation Arm64Mir2Lir::LocCReturnRef() { + return arm_loc_c_return; +} + RegLocation Arm64Mir2Lir::LocCReturnWide() { return arm_loc_c_return_wide; } @@ -258,7 +253,6 @@ static uint64_t RepeatBitsAcrossReg(bool is_wide, uint64_t value, unsigned width unsigned i; unsigned reg_size = (is_wide) ? 64 : 32; uint64_t result = value & BIT_MASK(width); - DCHECK_NE(width, reg_size); for (i = width; i < reg_size; i *= 2) { result |= (result << i); } @@ -573,7 +567,7 @@ RegisterClass Arm64Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volat if (UNLIKELY(is_volatile)) { // On arm64, fp register load/store is atomic only for single bytes. if (size != kSignedByte && size != kUnsignedByte) { - return kCoreReg; + return (size == kReference) ? kRefReg : kCoreReg; } } return RegClassBySize(size); @@ -836,7 +830,7 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { int num_fpr_used = 0; /* - * Dummy up a RegLocation for the incoming Method* + * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod> * It will attempt to keep kArg0 live (or copy it to home location * if promoted). */ @@ -845,14 +839,10 @@ void Arm64Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { rl_src.reg = TargetReg(kArg0); rl_src.home = false; MarkLive(rl_src); - - // rl_method might be 32-bit, but ArtMethod* on stack is 64-bit, so always flush it. - StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0)); - - // If Method* has been promoted, load it, - // otherwise, rl_method is the 32-bit value on [sp], and has already been loaded. + StoreValue(rl_method, rl_src); + // If Method* has been promoted, explicitly flush if (rl_method.location == kLocPhysReg) { - StoreValue(rl_method, rl_src); + StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0)); } if (cu_->num_ins == 0) { @@ -909,9 +899,7 @@ int Arm64Mir2Lir::LoadArgRegs(CallInfo* info, int call_state, RegLocation rl_arg = info->args[next_arg++]; rl_arg = UpdateRawLoc(rl_arg); if (rl_arg.wide && (next_reg <= TargetReg(kArg2).GetReg())) { - RegStorage r_tmp(RegStorage::k64BitPair, next_reg, next_reg + 1); - LoadValueDirectWideFixed(rl_arg, r_tmp); - next_reg++; + LoadValueDirectWideFixed(rl_arg, RegStorage::Solo64(next_reg)); next_arg++; } else { if (rl_arg.wide) { diff --git a/compiler/dex/quick/arm64/utility_arm64.cc b/compiler/dex/quick/arm64/utility_arm64.cc index eca0d2fa82..d0ab4f6844 100644 --- a/compiler/dex/quick/arm64/utility_arm64.cc +++ b/compiler/dex/quick/arm64/utility_arm64.cc @@ -504,7 +504,7 @@ LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_s CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit()); CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit()); if (EncodingMap[opcode].flags & IS_QUAD_OP) { - DCHECK_EQ(shift, ENCODE_NO_SHIFT); + DCHECK(!IsExtendEncoding(shift)); return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift); } else { DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP); @@ -706,40 +706,46 @@ bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) { LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest, int scale, OpSize size) { LIR* load; + int expected_scale = 0; ArmOpcode opcode = kA64Brk1d; - ArmOpcode wide = kA64NotWide; - - DCHECK(scale == 0 || scale == 1); if (r_dest.IsFloat()) { - bool is_double = r_dest.IsDouble(); - bool is_single = !is_double; - DCHECK_EQ(is_single, r_dest.IsSingle()); - - // If r_dest is a single, then size must be either k32 or kSingle. - // If r_dest is a double, then size must be either k64 or kDouble. - DCHECK(!is_single || size == k32 || size == kSingle); - DCHECK(!is_double || size == k64 || size == kDouble); - return NewLIR4((is_double) ? FWIDE(kA64Ldr4fXxG) : kA64Ldr4fXxG, - r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); + if (r_dest.IsDouble()) { + DCHECK(size == k64 || size == kDouble); + expected_scale = 3; + opcode = FWIDE(kA64Ldr4fXxG); + } else { + DCHECK(r_dest.IsSingle()); + DCHECK(size == k32 || size == kSingle); + expected_scale = 2; + opcode = kA64Ldr4fXxG; + } + + DCHECK(scale == 0 || scale == expected_scale); + return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), + (scale != 0) ? 1 : 0); } switch (size) { case kDouble: case kWord: case k64: - wide = kA64Wide; - // Intentional fall-trough. + opcode = WIDE(kA64Ldr4rXxG); + expected_scale = 3; + break; case kSingle: case k32: case kReference: opcode = kA64Ldr4rXxG; + expected_scale = 2; break; case kUnsignedHalf: opcode = kA64Ldrh4wXxd; + expected_scale = 1; break; case kSignedHalf: opcode = kA64Ldrsh4rXxd; + expected_scale = 1; break; case kUnsignedByte: opcode = kA64Ldrb3wXx; @@ -751,13 +757,14 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto LOG(FATAL) << "Bad size: " << size; } - if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) { - // Tertiary ops (e.g. ldrb, ldrsb) do not support scale. + if (UNLIKELY(expected_scale == 0)) { + // This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale. + DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); DCHECK_EQ(scale, 0); - load = NewLIR3(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); + load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg()); } else { - DCHECK(scale == 0 || scale == ((wide == kA64Wide) ? 3 : 2)); - load = NewLIR4(opcode | wide, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), + DCHECK(scale == 0 || scale == expected_scale); + load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(), (scale != 0) ? 1 : 0); } @@ -767,39 +774,43 @@ LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegSto LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src, int scale, OpSize size) { LIR* store; + int expected_scale = 0; ArmOpcode opcode = kA64Brk1d; - ArmOpcode wide = kA64NotWide; - - DCHECK(scale == 0 || scale == 1); if (r_src.IsFloat()) { - bool is_double = r_src.IsDouble(); - bool is_single = !is_double; - DCHECK_EQ(is_single, r_src.IsSingle()); - - // If r_src is a single, then size must be either k32 or kSingle. - // If r_src is a double, then size must be either k64 or kDouble. - DCHECK(!is_single || size == k32 || size == kSingle); - DCHECK(!is_double || size == k64 || size == kDouble); - return NewLIR4((is_double) ? FWIDE(kA64Str4fXxG) : kA64Str4fXxG, - r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); + if (r_src.IsDouble()) { + DCHECK(size == k64 || size == kDouble); + expected_scale = 3; + opcode = FWIDE(kA64Str4fXxG); + } else { + DCHECK(r_src.IsSingle()); + DCHECK(size == k32 || size == kSingle); + expected_scale = 2; + opcode = kA64Str4fXxG; + } + + DCHECK(scale == 0 || scale == expected_scale); + return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), + (scale != 0) ? 1 : 0); } switch (size) { case kDouble: // Intentional fall-trough. case kWord: // Intentional fall-trough. case k64: - opcode = kA64Str4rXxG; - wide = kA64Wide; + opcode = WIDE(kA64Str4rXxG); + expected_scale = 3; break; case kSingle: // Intentional fall-trough. case k32: // Intentional fall-trough. case kReference: opcode = kA64Str4rXxG; + expected_scale = 2; break; case kUnsignedHalf: case kSignedHalf: opcode = kA64Strh4wXxd; + expected_scale = 1; break; case kUnsignedByte: case kSignedByte: @@ -809,12 +820,14 @@ LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegSt LOG(FATAL) << "Bad size: " << size; } - if (UNLIKELY((EncodingMap[opcode].flags & IS_TERTIARY_OP) != 0)) { - // Tertiary ops (e.g. strb) do not support scale. + if (UNLIKELY(expected_scale == 0)) { + // This is a tertiary op (e.g. strb), it does not not support scale. + DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U); DCHECK_EQ(scale, 0); - store = NewLIR3(opcode | wide, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); + store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg()); } else { - store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), scale); + store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(), + (scale != 0) ? 1 : 0); } return store; @@ -842,8 +855,8 @@ LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStor opcode = FWIDE(kA64Ldr3fXD); alt_opcode = FWIDE(kA64Ldur3fXd); } else { - opcode = FWIDE(kA64Ldr3rXD); - alt_opcode = FWIDE(kA64Ldur3rXd); + opcode = WIDE(kA64Ldr3rXD); + alt_opcode = WIDE(kA64Ldur3rXd); } break; case kSingle: // Intentional fall-through. diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 256135df71..3fbbc4eba7 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1201,21 +1201,27 @@ std::vector<uint8_t>* Mir2Lir::ReturnCallFrameInformation() { } RegLocation Mir2Lir::NarrowRegLoc(RegLocation loc) { - loc.wide = false; if (loc.location == kLocPhysReg) { + DCHECK(!loc.reg.Is32Bit()); if (loc.reg.IsPair()) { - loc.reg = loc.reg.GetLow(); + RegisterInfo* info_lo = GetRegInfo(loc.reg.GetLow()); + RegisterInfo* info_hi = GetRegInfo(loc.reg.GetHigh()); + info_lo->SetIsWide(false); + info_hi->SetIsWide(false); + loc.reg = info_lo->GetReg(); } else { - // FIXME: temp workaround. - // Issue here: how do we narrow to a 32-bit value in 64-bit container? - // Probably the wrong thing to narrow the RegStorage container here. That - // should be a target decision. At the RegLocation level, we're only - // modifying the view of the Dalvik value - this is orthogonal to the storage - // container size. Consider this a temp workaround. - DCHECK(loc.reg.IsDouble()); - loc.reg = loc.reg.DoubleToLowSingle(); + RegisterInfo* info = GetRegInfo(loc.reg); + RegisterInfo* info_new = info->FindMatchingView(RegisterInfo::k32SoloStorageMask); + DCHECK(info_new != nullptr); + if (info->IsLive() && (info->SReg() == loc.s_reg_low)) { + info->MarkDead(); + info_new->MarkLive(loc.s_reg_low); + } + loc.reg = info_new->GetReg(); } + DCHECK(loc.reg.Valid()); } + loc.wide = false; return loc; } diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 526c981ae9..6397208790 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -43,6 +43,7 @@ MIR* AllocReplacementMIR(MIRGraph* mir_graph, MIR* invoke, MIR* move_return) { uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) { DCHECK_LT(arg, invoke->dalvikInsn.vA); + DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode)); if (Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc) { return invoke->dalvikInsn.vC + arg; // Non-range invoke. } else { @@ -53,6 +54,7 @@ uint32_t GetInvokeReg(MIR* invoke, uint32_t arg) { bool WideArgIsInConsecutiveDalvikRegs(MIR* invoke, uint32_t arg) { DCHECK_LT(arg + 1, invoke->dalvikInsn.vA); + DCHECK(!MIRGraph::IsPseudoMirOp(invoke->dalvikInsn.opcode)); return Instruction::FormatOf(invoke->dalvikInsn.opcode) == Instruction::k3rc || invoke->dalvikInsn.arg[arg + 1u] == invoke->dalvikInsn.arg[arg] + 1u; } diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc index 7e3c8ce7e7..62c81d05bb 100644 --- a/compiler/dex/quick/gen_common.cc +++ b/compiler/dex/quick/gen_common.cc @@ -220,6 +220,8 @@ void Mir2Lir::ForceImplicitNullCheck(RegStorage reg, int opt_flags) { void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, RegLocation rl_src2, LIR* taken, LIR* fall_through) { + DCHECK(!rl_src1.fp); + DCHECK(!rl_src2.fp); ConditionCode cond; switch (opcode) { case Instruction::IF_EQ: @@ -253,7 +255,7 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, cond = FlipComparisonOrder(cond); } - rl_src1 = LoadValue(rl_src1, kCoreReg); + rl_src1 = LoadValue(rl_src1); // Is this really an immediate comparison? if (rl_src2.is_const) { // If it's already live in a register or not easily materialized, just keep going @@ -265,14 +267,15 @@ void Mir2Lir::GenCompareAndBranch(Instruction::Code opcode, RegLocation rl_src1, return; } } - rl_src2 = LoadValue(rl_src2, kCoreReg); + rl_src2 = LoadValue(rl_src2); OpCmpBranch(cond, rl_src1.reg, rl_src2.reg, taken); } void Mir2Lir::GenCompareZeroAndBranch(Instruction::Code opcode, RegLocation rl_src, LIR* taken, LIR* fall_through) { ConditionCode cond; - rl_src = LoadValue(rl_src, kCoreReg); + DCHECK(!rl_src.fp); + rl_src = LoadValue(rl_src); switch (opcode) { case Instruction::IF_EQZ: cond = kCondEq; @@ -371,7 +374,7 @@ static void GenNewArrayImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocArrayWithAccessCheck); mir_to_lir->CallRuntimeHelperImmMethodRegLocation(func_offset, type_idx, rl_src, true); } - RegLocation rl_result = mir_to_lir->GetReturn(false); + RegLocation rl_result = mir_to_lir->GetReturn(kRefReg); mir_to_lir->StoreValue(rl_dest, rl_result); } @@ -503,7 +506,7 @@ void Mir2Lir::GenFilledNewArray(CallInfo* info) { } } if (info->result.location != kLocInvalid) { - StoreValue(info->result, GetReturn(false /* not fp */)); + StoreValue(info->result, GetReturn(kRefReg)); } } @@ -562,8 +565,8 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double, RegStorage r_base; if (field_info.IsReferrersClass()) { // Fast path, static storage base is this method's class - RegLocation rl_method = LoadCurrMethod(); - r_base = AllocTemp(); + RegLocation rl_method = LoadCurrMethod(); + r_base = AllocTempRef(); LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base); if (IsTemp(rl_method.reg)) { FreeTemp(rl_method.reg); @@ -603,6 +606,8 @@ void Mir2Lir::GenSput(MIR* mir, RegLocation rl_src, bool is_long_or_double, field_info.StorageIndex(), r_base)); FreeTemp(r_tmp); + // Ensure load of status and load of value don't re-order. + GenMemBarrier(kLoadLoad); } FreeTemp(r_method); } @@ -658,7 +663,7 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, if (field_info.IsReferrersClass()) { // Fast path, static storage base is this method's class RegLocation rl_method = LoadCurrMethod(); - r_base = AllocTemp(); + r_base = AllocTempRef(); LoadRefDisp(rl_method.reg, mirror::ArtMethod::DeclaringClassOffset().Int32Value(), r_base); } else { // Medium path, static storage base in a different class which requires checks that the other @@ -694,6 +699,8 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, field_info.StorageIndex(), r_base)); FreeTemp(r_tmp); + // Ensure load of status and load of value don't re-order. + GenMemBarrier(kLoadLoad); } FreeTemp(r_method); } @@ -726,10 +733,10 @@ void Mir2Lir::GenSget(MIR* mir, RegLocation rl_dest, GenSgetCall<4>(this, is_long_or_double, is_object, &field_info); } if (is_long_or_double) { - RegLocation rl_result = GetReturnWide(rl_dest.fp); + RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest)); StoreValueWide(rl_dest, rl_result); } else { - RegLocation rl_result = GetReturn(rl_dest.fp); + RegLocation rl_result = GetReturn(LocToRegClass(rl_dest)); StoreValue(rl_dest, rl_result); } } @@ -766,7 +773,7 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, (!field_info.IsVolatile() || SupportsVolatileLoadStore(load_size))) { RegisterClass reg_class = RegClassForFieldLoadStore(load_size, field_info.IsVolatile()); DCHECK_GE(field_info.FieldOffset().Int32Value(), 0); - rl_obj = LoadValue(rl_obj, kCoreReg); + rl_obj = LoadValue(rl_obj, kRefReg); GenNullCheck(rl_obj.reg, opt_flags); RegLocation rl_result = EvalLoc(rl_dest, reg_class, true); int field_offset = field_info.FieldOffset().Int32Value(); @@ -793,10 +800,10 @@ void Mir2Lir::GenIGet(MIR* mir, int opt_flags, OpSize size, GenIgetCall<4>(this, is_long_or_double, is_object, &field_info, rl_obj); } if (is_long_or_double) { - RegLocation rl_result = GetReturnWide(rl_dest.fp); + RegLocation rl_result = GetReturnWide(LocToRegClass(rl_dest)); StoreValueWide(rl_dest, rl_result); } else { - RegLocation rl_result = GetReturn(rl_dest.fp); + RegLocation rl_result = GetReturn(LocToRegClass(rl_dest)); StoreValue(rl_dest, rl_result); } } @@ -824,7 +831,7 @@ void Mir2Lir::GenIPut(MIR* mir, int opt_flags, OpSize size, (!field_info.IsVolatile() || SupportsVolatileLoadStore(store_size))) { RegisterClass reg_class = RegClassForFieldLoadStore(store_size, field_info.IsVolatile()); DCHECK_GE(field_info.FieldOffset().Int32Value(), 0); - rl_obj = LoadValue(rl_obj, kCoreReg); + rl_obj = LoadValue(rl_obj, kRefReg); if (is_long_or_double) { rl_src = LoadValueWide(rl_src, reg_class); } else { @@ -881,7 +888,7 @@ void Mir2Lir::GenArrayObjPut(int opt_flags, RegLocation rl_array, RegLocation rl void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { RegLocation rl_method = LoadCurrMethod(); RegStorage res_reg = AllocTemp(); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); if (!cu_->compiler_driver->CanAccessTypeWithoutChecks(cu_->method_idx, *cu_->dex_file, type_idx)) { @@ -894,15 +901,15 @@ void Mir2Lir::GenConstClass(uint32_t type_idx, RegLocation rl_dest) { CallRuntimeHelperImmReg(QUICK_ENTRYPOINT_OFFSET(4, pInitializeTypeAndVerifyAccess), type_idx, rl_method.reg, true); } - RegLocation rl_result = GetReturn(false); + RegLocation rl_result = GetReturn(kRefReg); StoreValue(rl_dest, rl_result); } else { // We're don't need access checks, load type from dex cache int32_t dex_cache_offset = mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(); - Load32Disp(rl_method.reg, dex_cache_offset, res_reg); + LoadRefDisp(rl_method.reg, dex_cache_offset, res_reg); int32_t offset_of_type = ClassArray::OffsetOfElement(type_idx).Int32Value(); - Load32Disp(res_reg, offset_of_type, rl_result.reg); + LoadRefDisp(res_reg, offset_of_type, rl_result.reg); if (!cu_->compiler_driver->CanAssumeTypeIsPresentInDexCache(*cu_->dex_file, type_idx) || SLOW_TYPE_PATH) { // Slow path, at runtime test if type is null and if so initialize @@ -976,7 +983,7 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { TargetReg(kArg0)); // Might call out to helper, which will return resolved string in kRet0 - Load32Disp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0)); + LoadRefDisp(TargetReg(kArg0), offset_of_string, TargetReg(kRet0)); LIR* fromfast = OpCmpImmBranch(kCondEq, TargetReg(kRet0), 0, NULL); LIR* cont = NewLIR0(kPseudoTargetLabel); @@ -1010,13 +1017,13 @@ void Mir2Lir::GenConstString(uint32_t string_idx, RegLocation rl_dest) { } GenBarrier(); - StoreValue(rl_dest, GetReturn(false)); + StoreValue(rl_dest, GetReturn(kRefReg)); } else { RegLocation rl_method = LoadCurrMethod(); - RegStorage res_reg = AllocTemp(); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegStorage res_reg = AllocTempRef(); + RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); LoadRefDisp(rl_method.reg, mirror::ArtMethod::DexCacheStringsOffset().Int32Value(), res_reg); - Load32Disp(res_reg, offset_of_string, rl_result.reg); + LoadRefDisp(res_reg, offset_of_string, rl_result.reg); StoreValue(rl_dest, rl_result); } } @@ -1071,7 +1078,7 @@ static void GenNewInstanceImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, uint32_ func_offset = QUICK_ENTRYPOINT_OFFSET(pointer_size, pAllocObjectWithAccessCheck); mir_to_lir->CallRuntimeHelperImmMethod(func_offset, type_idx, true); } - RegLocation rl_result = mir_to_lir->GetReturn(false); + RegLocation rl_result = mir_to_lir->GetReturn(kRefReg); mir_to_lir->StoreValue(rl_dest, rl_result); } @@ -1103,7 +1110,7 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re // X86 has its own implementation. DCHECK(cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64); - RegLocation object = LoadValue(rl_src, kCoreReg); + RegLocation object = LoadValue(rl_src, kRefReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage result_reg = rl_result.reg; if (result_reg == object.reg) { @@ -1112,8 +1119,8 @@ void Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, Re LoadConstant(result_reg, 0); // assume false LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL); - RegStorage check_class = AllocTypedTemp(false, kCoreReg); - RegStorage object_class = AllocTypedTemp(false, kCoreReg); + RegStorage check_class = AllocTypedTemp(false, kRefReg); + RegStorage object_class = AllocTypedTemp(false, kRefReg); LoadCurrMethodDirect(check_class); if (use_declaring_class) { @@ -1206,7 +1213,7 @@ void Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_know } } /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result */ - RegLocation rl_result = GetReturn(false); + RegLocation rl_result = GetReturn(kRefReg); if (cu_->instruction_set == kMips) { // On MIPS rArg0 != rl_result, place false in result if branch is taken. LoadConstant(rl_result.reg, 0); @@ -1511,7 +1518,7 @@ void Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest, } else { GenShiftOpLongCall<4>(this, opcode, rl_src1, rl_shift); } - RegLocation rl_result = GetReturnWide(false); + RegLocation rl_result = GetReturnWide(kCoreReg); StoreValueWide(rl_dest, rl_result); } @@ -1653,7 +1660,7 @@ void Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest, CallHelper(r_tgt, QUICK_ENTRYPOINT_OFFSET(4, pIdivmod), false /* not a safepoint */); } if (op == kOpDiv) - rl_result = GetReturn(false); + rl_result = GetReturn(kCoreReg); else rl_result = GetReturnAlt(); } @@ -1918,7 +1925,7 @@ void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, Re false); } if (is_div) - rl_result = GetReturn(false); + rl_result = GetReturn(kCoreReg); else rl_result = GetReturnAlt(); } @@ -2081,7 +2088,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc } // Adjust return regs in to handle case of rem returning kArg2/kArg3 if (ret_reg == mir_to_lir->TargetReg(kRet0).GetReg()) - rl_result = mir_to_lir->GetReturnWide(false); + rl_result = mir_to_lir->GetReturnWide(kCoreReg); else rl_result = mir_to_lir->GetReturnWideAlt(); mir_to_lir->StoreValueWide(rl_dest, rl_result); @@ -2119,11 +2126,11 @@ void Mir2Lir::GenConversionCall(ThreadOffset<pointer_size> func_offset, CallRuntimeHelperRegLocation(func_offset, rl_src, false); if (rl_dest.wide) { RegLocation rl_result; - rl_result = GetReturnWide(rl_dest.fp); + rl_result = GetReturnWide(LocToRegClass(rl_dest)); StoreValueWide(rl_dest, rl_result); } else { RegLocation rl_result; - rl_result = GetReturn(rl_dest.fp); + rl_result = GetReturn(LocToRegClass(rl_dest)); StoreValue(rl_dest, rl_result); } } diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc index 5ec1ca9d63..842533b66b 100644 --- a/compiler/dex/quick/gen_invoke.cc +++ b/compiler/dex/quick/gen_invoke.cc @@ -434,7 +434,7 @@ INSTANTIATE(void Mir2Lir::CallRuntimeHelperRegLocationRegLocationRegLocation, Re */ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { /* - * Dummy up a RegLocation for the incoming Method* + * Dummy up a RegLocation for the incoming StackReference<mirror::ArtMethod> * It will attempt to keep kArg0 live (or copy it to home location * if promoted). */ @@ -443,14 +443,10 @@ void Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) { rl_src.reg = TargetReg(kArg0); rl_src.home = false; MarkLive(rl_src); - if (rl_method.wide) { - StoreValueWide(rl_method, rl_src); - } else { - StoreValue(rl_method, rl_src); - } + StoreValue(rl_method, rl_src); // If Method* has been promoted, explicitly flush if (rl_method.location == kLocPhysReg) { - StoreWordDisp(TargetReg(kSp), 0, TargetReg(kArg0)); + StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0)); } if (cu_->num_ins == 0) { @@ -864,8 +860,17 @@ int Mir2Lir::GenDalvikArgsNoRange(CallInfo* info, // Wide spans, we need the 2nd half of uses[2]. rl_arg = UpdateLocWide(rl_use2); if (rl_arg.location == kLocPhysReg) { - // NOTE: not correct for 64-bit core regs, but this needs rewriting for hard-float. - reg = rl_arg.reg.IsPair() ? rl_arg.reg.GetHigh() : rl_arg.reg.DoubleToHighSingle(); + if (rl_arg.reg.IsPair()) { + reg = rl_arg.reg.GetHigh(); + } else { + RegisterInfo* info = GetRegInfo(rl_arg.reg); + info = info->FindMatchingView(RegisterInfo::kHighSingleStorageMask); + if (info == nullptr) { + // NOTE: For hard float convention we won't split arguments across reg/mem. + UNIMPLEMENTED(FATAL) << "Needs hard float api."; + } + reg = info->GetReg(); + } } else { // kArg2 & rArg3 can safely be used here reg = TargetReg(kArg3); @@ -1151,7 +1156,7 @@ int Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state, RegLocation Mir2Lir::InlineTarget(CallInfo* info) { RegLocation res; if (info->result.location == kLocInvalid) { - res = GetReturn(false); + res = GetReturn(LocToRegClass(info->result)); } else { res = info->result; } @@ -1161,7 +1166,7 @@ RegLocation Mir2Lir::InlineTarget(CallInfo* info) { RegLocation Mir2Lir::InlineTargetWide(CallInfo* info) { RegLocation res; if (info->result.location == kLocInvalid) { - res = GetReturnWide(false); + res = GetReturnWide(kCoreReg); } else { res = info->result; } @@ -1184,7 +1189,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { RegLocation rl_obj = info->args[0]; RegLocation rl_idx = info->args[1]; - rl_obj = LoadValue(rl_obj, kCoreReg); + rl_obj = LoadValue(rl_obj, kRefReg); // X86 wants to avoid putting a constant index into a register. if (!((cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64)&& rl_idx.is_const)) { rl_idx = LoadValue(rl_idx, kCoreReg); @@ -1197,7 +1202,7 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { RegStorage reg_ptr; if (cu_->instruction_set != kX86 && cu_->instruction_set != kX86_64) { reg_off = AllocTemp(); - reg_ptr = AllocTemp(); + reg_ptr = AllocTempRef(); if (range_check) { reg_max = AllocTemp(); Load32Disp(rl_obj.reg, count_offset, reg_max); @@ -1227,9 +1232,9 @@ bool Mir2Lir::GenInlinedCharAt(CallInfo* info) { } } reg_off = AllocTemp(); - reg_ptr = AllocTemp(); + reg_ptr = AllocTempRef(); Load32Disp(rl_obj.reg, offset_offset, reg_off); - Load32Disp(rl_obj.reg, value_offset, reg_ptr); + LoadRefDisp(rl_obj.reg, value_offset, reg_ptr); } if (rl_idx.is_const) { OpRegImm(kOpAdd, reg_off, mir_graph_->ConstantValue(rl_idx.orig_sreg)); @@ -1266,7 +1271,7 @@ bool Mir2Lir::GenInlinedStringIsEmptyOrLength(CallInfo* info, bool is_empty) { } // dst = src.length(); RegLocation rl_obj = info->args[0]; - rl_obj = LoadValue(rl_obj, kCoreReg); + rl_obj = LoadValue(rl_obj, kRefReg); RegLocation rl_dest = InlineTarget(info); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); GenNullCheck(rl_obj.reg, info->opt_flags); @@ -1472,7 +1477,7 @@ bool Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { DCHECK_EQ(mir_graph_->ConstantValue(rl_char) & ~0xFFFF, 0); DCHECK(high_code_point_branch == nullptr); } - RegLocation rl_return = GetReturn(false); + RegLocation rl_return = GetReturn(kCoreReg); RegLocation rl_dest = InlineTarget(info); StoreValue(rl_dest, rl_return); return true; @@ -1518,7 +1523,7 @@ bool Mir2Lir::GenInlinedStringCompareTo(CallInfo* info) { OpThreadMem(kOpBlx, QUICK_ENTRYPOINT_OFFSET(4, pStringCompareTo)); } } - RegLocation rl_return = GetReturn(false); + RegLocation rl_return = GetReturn(kCoreReg); RegLocation rl_dest = InlineTarget(info); StoreValue(rl_dest, rl_return); return true; @@ -1570,7 +1575,7 @@ bool Mir2Lir::GenInlinedUnsafeGet(CallInfo* info, rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3] RegLocation rl_dest = is_long ? InlineTargetWide(info) : InlineTarget(info); // result reg - RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); if (is_long) { @@ -1616,7 +1621,7 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, // There might have been a store before this volatile one so insert StoreStore barrier. GenMemBarrier(kStoreStore); } - RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); + RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg); RegLocation rl_value; if (is_long) { @@ -1630,7 +1635,7 @@ bool Mir2Lir::GenInlinedUnsafePut(CallInfo* info, bool is_long, FreeTemp(rl_temp_offset); } } else { - rl_value = LoadValue(rl_src_value, kCoreReg); + rl_value = LoadValue(rl_src_value); StoreBaseIndexed(rl_object.reg, rl_offset.reg, rl_value.reg, 0, k32); } @@ -1653,7 +1658,7 @@ void Mir2Lir::GenInvoke(CallInfo* info) { if (info->type != kStatic && ((cu_->disable_opt & (1 << kNullCheckElimination)) != 0 || (info->opt_flags & MIR_IGNORE_NULL_CHECK) == 0)) { - RegLocation rl_obj = LoadValue(info->args[0], kCoreReg); + RegLocation rl_obj = LoadValue(info->args[0], kRefReg); GenNullCheck(rl_obj.reg); } return; @@ -1778,10 +1783,10 @@ void Mir2Lir::GenInvokeNoInline(CallInfo* info) { if (info->result.location != kLocInvalid) { // We have a following MOVE_RESULT - do it now. if (info->result.wide) { - RegLocation ret_loc = GetReturnWide(info->result.fp); + RegLocation ret_loc = GetReturnWide(LocToRegClass(info->result)); StoreValueWide(info->result, ret_loc); } else { - RegLocation ret_loc = GetReturn(info->result.fp); + RegLocation ret_loc = GetReturn(LocToRegClass(info->result)); StoreValue(info->result, ret_loc); } } diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc index f5e7e635de..2c8b9b9adf 100644 --- a/compiler/dex/quick/gen_loadstore.cc +++ b/compiler/dex/quick/gen_loadstore.cc @@ -139,6 +139,7 @@ void Mir2Lir::LoadValueDirectWideFixed(RegLocation rl_src, RegStorage r_dest) { } RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) { + DCHECK(!rl_src.ref || op_kind == kRefReg); rl_src = UpdateLoc(rl_src); if (rl_src.location == kLocPhysReg) { if (!RegClassMatches(op_kind, rl_src.reg)) { @@ -162,6 +163,10 @@ RegLocation Mir2Lir::LoadValue(RegLocation rl_src, RegisterClass op_kind) { return rl_src; } +RegLocation Mir2Lir::LoadValue(RegLocation rl_src) { + return LoadValue(rl_src, LocToRegClass(rl_src)); +} + void Mir2Lir::StoreValue(RegLocation rl_dest, RegLocation rl_src) { /* * Sanity checking - should never try to store to the same @@ -366,7 +371,7 @@ void Mir2Lir::LoadCurrMethodDirect(RegStorage r_tgt) { } RegLocation Mir2Lir::LoadCurrMethod() { - return LoadValue(mir_graph_->GetMethodLoc(), kCoreReg); + return LoadValue(mir_graph_->GetMethodLoc(), kRefReg); } RegLocation Mir2Lir::ForceTemp(RegLocation loc) { diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index 3af3715f47..e1bdb2e9b6 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -261,11 +261,11 @@ void MipsMir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) { void MipsMir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Thread::ExceptionOffset<4>().Int32Value(); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); - RegStorage reset_reg = AllocTemp(); - Load32Disp(rs_rMIPS_SELF, ex_offset, rl_result.reg); + RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); + RegStorage reset_reg = AllocTempRef(); + LoadRefDisp(rs_rMIPS_SELF, ex_offset, rl_result.reg); LoadConstant(reset_reg, 0); - Store32Disp(rs_rMIPS_SELF, ex_offset, reset_reg); + StoreRefDisp(rs_rMIPS_SELF, ex_offset, reset_reg); FreeTemp(reset_reg); StoreValue(rl_dest, rl_result); } diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h index e46217337b..ea3c901fa6 100644 --- a/compiler/dex/quick/mips/codegen_mips.h +++ b/compiler/dex/quick/mips/codegen_mips.h @@ -59,6 +59,7 @@ class MipsMir2Lir FINAL : public Mir2Lir { RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); + RegLocation LocCReturnRef(); RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc index 9fffb2fd1d..4e31477189 100644 --- a/compiler/dex/quick/mips/fp_mips.cc +++ b/compiler/dex/quick/mips/fp_mips.cc @@ -52,7 +52,7 @@ void MipsMir2Lir::GenArithOpFloat(Instruction::Code opcode, FlushAllRegs(); // Send everything to home location CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2, false); - rl_result = GetReturn(true); + rl_result = GetReturn(kFPReg); StoreValue(rl_dest, rl_result); return; case Instruction::NEG_FLOAT: @@ -95,7 +95,7 @@ void MipsMir2Lir::GenArithOpDouble(Instruction::Code opcode, FlushAllRegs(); // Send everything to home location CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2, false); - rl_result = GetReturnWide(true); + rl_result = GetReturnWide(kFPReg); StoreValueWide(rl_dest, rl_result); return; case Instruction::NEG_DOUBLE: @@ -204,7 +204,7 @@ void MipsMir2Lir::GenCmpFP(Instruction::Code opcode, RegLocation rl_dest, RegStorage r_tgt = LoadHelper(offset); // NOTE: not a safepoint OpReg(kOpBlx, r_tgt); - RegLocation rl_result = GetReturn(false); + RegLocation rl_result = GetReturn(kCoreReg); StoreValue(rl_dest, rl_result); } diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc index 55cf4344f1..c1a7c990f0 100644 --- a/compiler/dex/quick/mips/target_mips.cc +++ b/compiler/dex/quick/mips/target_mips.cc @@ -26,46 +26,43 @@ namespace art { -static const RegStorage core_regs_arr[] = +static constexpr RegStorage core_regs_arr[] = {rs_rZERO, rs_rAT, rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2, rs_rT3, rs_rT4, rs_rT5, rs_rT6, rs_rT7, rs_rS0, rs_rS1, rs_rS2, rs_rS3, rs_rS4, rs_rS5, rs_rS6, rs_rS7, rs_rT8, rs_rT9, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rFP, rs_rRA}; -static RegStorage sp_regs_arr[] = +static constexpr RegStorage sp_regs_arr[] = {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10, rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15}; -static RegStorage dp_regs_arr[] = +static constexpr RegStorage dp_regs_arr[] = {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7}; -static const RegStorage reserved_regs_arr[] = +static constexpr RegStorage reserved_regs_arr[] = {rs_rZERO, rs_rAT, rs_rS0, rs_rS1, rs_rK0, rs_rK1, rs_rGP, rs_rSP, rs_rRA}; -static RegStorage core_temps_arr[] = +static constexpr RegStorage core_temps_arr[] = {rs_rV0, rs_rV1, rs_rA0, rs_rA1, rs_rA2, rs_rA3, rs_rT0, rs_rT1, rs_rT2, rs_rT3, rs_rT4, rs_rT5, rs_rT6, rs_rT7, rs_rT8}; -static RegStorage sp_temps_arr[] = +static constexpr RegStorage sp_temps_arr[] = {rs_rF0, rs_rF1, rs_rF2, rs_rF3, rs_rF4, rs_rF5, rs_rF6, rs_rF7, rs_rF8, rs_rF9, rs_rF10, rs_rF11, rs_rF12, rs_rF13, rs_rF14, rs_rF15}; -static RegStorage dp_temps_arr[] = +static constexpr RegStorage dp_temps_arr[] = {rs_rD0, rs_rD1, rs_rD2, rs_rD3, rs_rD4, rs_rD5, rs_rD6, rs_rD7}; -static const std::vector<RegStorage> empty_pool; -static const std::vector<RegStorage> core_regs(core_regs_arr, - core_regs_arr + sizeof(core_regs_arr) / sizeof(core_regs_arr[0])); -static const std::vector<RegStorage> sp_regs(sp_regs_arr, - sp_regs_arr + sizeof(sp_regs_arr) / sizeof(sp_regs_arr[0])); -static const std::vector<RegStorage> dp_regs(dp_regs_arr, - dp_regs_arr + sizeof(dp_regs_arr) / sizeof(dp_regs_arr[0])); -static const std::vector<RegStorage> reserved_regs(reserved_regs_arr, - reserved_regs_arr + sizeof(reserved_regs_arr) / sizeof(reserved_regs_arr[0])); -static const std::vector<RegStorage> core_temps(core_temps_arr, - core_temps_arr + sizeof(core_temps_arr) / sizeof(core_temps_arr[0])); -static const std::vector<RegStorage> sp_temps(sp_temps_arr, - sp_temps_arr + sizeof(sp_temps_arr) / sizeof(sp_temps_arr[0])); -static const std::vector<RegStorage> dp_temps(dp_temps_arr, - dp_temps_arr + sizeof(dp_temps_arr) / sizeof(dp_temps_arr[0])); +static constexpr ArrayRef<const RegStorage> empty_pool; +static constexpr ArrayRef<const RegStorage> core_regs(core_regs_arr); +static constexpr ArrayRef<const RegStorage> sp_regs(sp_regs_arr); +static constexpr ArrayRef<const RegStorage> dp_regs(dp_regs_arr); +static constexpr ArrayRef<const RegStorage> reserved_regs(reserved_regs_arr); +static constexpr ArrayRef<const RegStorage> core_temps(core_temps_arr); +static constexpr ArrayRef<const RegStorage> sp_temps(sp_temps_arr); +static constexpr ArrayRef<const RegStorage> dp_temps(dp_temps_arr); RegLocation MipsMir2Lir::LocCReturn() { return mips_loc_c_return; } +RegLocation MipsMir2Lir::LocCReturnRef() { + return mips_loc_c_return; +} + RegLocation MipsMir2Lir::LocCReturnWide() { return mips_loc_c_return_wide; } diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc index 9fc93d0a1a..9621995b43 100644 --- a/compiler/dex/quick/mir_to_lir.cc +++ b/compiler/dex/quick/mir_to_lir.cc @@ -23,6 +23,36 @@ namespace art { +RegisterClass Mir2Lir::ShortyToRegClass(char shorty_type) { + RegisterClass res; + switch (shorty_type) { + case 'L': + res = kRefReg; + break; + case 'F': + // Expected fallthrough. + case 'D': + res = kFPReg; + break; + default: + res = kCoreReg; + } + return res; +} + +RegisterClass Mir2Lir::LocToRegClass(RegLocation loc) { + RegisterClass res; + if (loc.fp) { + DCHECK(!loc.ref) << "At most, one of ref/fp may be set"; + res = kFPReg; + } else if (loc.ref) { + res = kRefReg; + } else { + res = kCoreReg; + } + return res; +} + void Mir2Lir::LockArg(int in_position, bool wide) { RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position); RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) : @@ -149,15 +179,13 @@ bool Mir2Lir::GenSpecialIGet(MIR* mir, const InlineMethod& special) { return false; } - // The inliner doesn't distinguish kDouble or kFloat, use shorty. - bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D'; - // Point of no return - no aborts after this GenPrintLabel(mir); LockArg(data.object_arg); - RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg); - RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float); + RegStorage reg_obj = LoadArg(data.object_arg, kRefReg); RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile); + RegisterClass ret_reg_class = ShortyToRegClass(cu_->shorty[0]); + RegLocation rl_dest = wide ? GetReturnWide(ret_reg_class) : GetReturn(ret_reg_class); RegStorage r_result = rl_dest.reg; if (!RegClassMatches(reg_class, r_result)) { r_result = wide ? AllocTypedTempWide(rl_dest.fp, reg_class) @@ -205,7 +233,7 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { GenPrintLabel(mir); LockArg(data.object_arg); LockArg(data.src_arg, wide); - RegStorage reg_obj = LoadArg(data.object_arg, kCoreReg); + RegStorage reg_obj = LoadArg(data.object_arg, kRefReg); RegisterClass reg_class = RegClassForFieldLoadStore(size, data.is_volatile); RegStorage reg_src = LoadArg(data.src_arg, reg_class, wide); if (data.is_volatile) { @@ -226,13 +254,12 @@ bool Mir2Lir::GenSpecialIPut(MIR* mir, const InlineMethod& special) { bool Mir2Lir::GenSpecialIdentity(MIR* mir, const InlineMethod& special) { const InlineReturnArgData& data = special.d.return_data; bool wide = (data.is_wide != 0u); - // The inliner doesn't distinguish kDouble or kFloat, use shorty. - bool double_or_float = cu_->shorty[0] == 'F' || cu_->shorty[0] == 'D'; // Point of no return - no aborts after this GenPrintLabel(mir); LockArg(data.arg, wide); - RegLocation rl_dest = wide ? GetReturnWide(double_or_float) : GetReturn(double_or_float); + RegisterClass reg_class = ShortyToRegClass(cu_->shorty[0]); + RegLocation rl_dest = wide ? GetReturnWide(reg_class) : GetReturn(reg_class); LoadArgDirect(data.arg, rl_dest); return true; } @@ -254,7 +281,7 @@ bool Mir2Lir::GenSpecialCase(BasicBlock* bb, MIR* mir, const InlineMethod& speci break; case kInlineOpNonWideConst: { successful = true; - RegLocation rl_dest = GetReturn(cu_->shorty[0] == 'F'); + RegLocation rl_dest = GetReturn(ShortyToRegClass(cu_->shorty[0])); GenPrintLabel(mir); LoadConstant(rl_dest.reg, static_cast<int>(special.d.data)); return_mir = bb->GetNextUnconditionalMir(mir_graph_, mir); @@ -377,26 +404,30 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list } break; - case Instruction::RETURN: case Instruction::RETURN_OBJECT: + DCHECK(rl_src[0].ref); + // Intentional fallthrough. + case Instruction::RETURN: if (!mir_graph_->MethodIsLeaf()) { GenSuspendTest(opt_flags); } - StoreValue(GetReturn(cu_->shorty[0] == 'F'), rl_src[0]); + DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0])); + StoreValue(GetReturn(LocToRegClass(rl_src[0])), rl_src[0]); break; case Instruction::RETURN_WIDE: if (!mir_graph_->MethodIsLeaf()) { GenSuspendTest(opt_flags); } - StoreValueWide(GetReturnWide(cu_->shorty[0] == 'D'), rl_src[0]); + DCHECK_EQ(LocToRegClass(rl_src[0]), ShortyToRegClass(cu_->shorty[0])); + StoreValueWide(GetReturnWide(LocToRegClass(rl_src[0])), rl_src[0]); break; case Instruction::MOVE_RESULT_WIDE: if ((opt_flags & MIR_INLINED) != 0) { break; // Nop - combined w/ previous invoke. } - StoreValueWide(rl_dest, GetReturnWide(rl_dest.fp)); + StoreValueWide(rl_dest, GetReturnWide(LocToRegClass(rl_dest))); break; case Instruction::MOVE_RESULT: @@ -404,7 +435,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list if ((opt_flags & MIR_INLINED) != 0) { break; // Nop - combined w/ previous invoke. } - StoreValue(rl_dest, GetReturn(rl_dest.fp)); + StoreValue(rl_dest, GetReturn(LocToRegClass(rl_dest))); break; case Instruction::MOVE: @@ -474,7 +505,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::ARRAY_LENGTH: int len_offset; len_offset = mirror::Array::LengthOffset().Int32Value(); - rl_src[0] = LoadValue(rl_src[0], kCoreReg); + rl_src[0] = LoadValue(rl_src[0], kRefReg); GenNullCheck(rl_src[0].reg, opt_flags); rl_result = EvalLoc(rl_dest, kCoreReg, true); Load32Disp(rl_src[0].reg, len_offset, rl_result.reg); @@ -782,7 +813,7 @@ void Mir2Lir::CompileDalvikInstruction(MIR* mir, BasicBlock* bb, LIR* label_list case Instruction::LONG_TO_INT: rl_src[0] = UpdateLocWide(rl_src[0]); - rl_src[0] = WideToNarrow(rl_src[0]); + rl_src[0] = NarrowRegLoc(rl_src[0]); StoreValue(rl_dest, rl_src[0]); break; @@ -1069,7 +1100,7 @@ bool Mir2Lir::MethodBlockCodeGen(BasicBlock* bb) { work_half->meta.throw_insn = mir; } - if (opcode >= kMirOpFirst) { + if (MIRGraph::IsPseudoMirOp(opcode)) { HandleExtendedMethodMIR(bb, mir); continue; } diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index f58f078711..ed94a8d844 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -26,6 +26,7 @@ #include "driver/compiler_driver.h" #include "leb128.h" #include "safe_map.h" +#include "utils/array_ref.h" #include "utils/arena_allocator.h" #include "utils/growable_array.h" @@ -332,6 +333,15 @@ class Mir2Lir : public Backend { return arena->Alloc(size, kArenaAllocRegAlloc); } + static const uint32_t k32SoloStorageMask = 0x00000001; + static const uint32_t kLowSingleStorageMask = 0x00000001; + static const uint32_t kHighSingleStorageMask = 0x00000002; + static const uint32_t k64SoloStorageMask = 0x00000003; + static const uint32_t k128SoloStorageMask = 0x0000000f; + static const uint32_t k256SoloStorageMask = 0x000000ff; + static const uint32_t k512SoloStorageMask = 0x0000ffff; + static const uint32_t k1024SoloStorageMask = 0xffffffff; + bool InUse() { return (storage_mask_ & master_->used_storage_) != 0; } void MarkInUse() { master_->used_storage_ |= storage_mask_; } void MarkFree() { master_->used_storage_ &= ~storage_mask_; } @@ -389,7 +399,15 @@ class Mir2Lir : public Backend { LIR* DefEnd() { return def_end_; } void SetDefEnd(LIR* def_end) { def_end_ = def_end; } void ResetDefBody() { def_start_ = def_end_ = nullptr; } - + // Find member of aliased set matching storage_used; return nullptr if none. + RegisterInfo* FindMatchingView(uint32_t storage_used) { + RegisterInfo* res = Master(); + for (; res != nullptr; res = res->GetAliasChain()) { + if (res->StorageMask() == storage_used) + break; + } + return res; + } private: RegStorage reg_; @@ -412,16 +430,16 @@ class Mir2Lir : public Backend { class RegisterPool { public: RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, - const std::vector<RegStorage>& core_regs, - const std::vector<RegStorage>& core64_regs, - const std::vector<RegStorage>& sp_regs, - const std::vector<RegStorage>& dp_regs, - const std::vector<RegStorage>& reserved_regs, - const std::vector<RegStorage>& reserved64_regs, - const std::vector<RegStorage>& core_temps, - const std::vector<RegStorage>& core64_temps, - const std::vector<RegStorage>& sp_temps, - const std::vector<RegStorage>& dp_temps); + const ArrayRef<const RegStorage>& core_regs, + const ArrayRef<const RegStorage>& core64_regs, + const ArrayRef<const RegStorage>& sp_regs, + const ArrayRef<const RegStorage>& dp_regs, + const ArrayRef<const RegStorage>& reserved_regs, + const ArrayRef<const RegStorage>& reserved64_regs, + const ArrayRef<const RegStorage>& core_temps, + const ArrayRef<const RegStorage>& core64_temps, + const ArrayRef<const RegStorage>& sp_temps, + const ArrayRef<const RegStorage>& dp_temps); ~RegisterPool() {} static void* operator new(size_t size, ArenaAllocator* arena) { return arena->Alloc(size, kArenaAllocRegAlloc); @@ -439,6 +457,8 @@ class Mir2Lir : public Backend { int next_sp_reg_; GrowableArray<RegisterInfo*> dp_regs_; // Double precision float. int next_dp_reg_; + GrowableArray<RegisterInfo*>* ref_regs_; // Points to core_regs_ or core64_regs_ + int* next_ref_reg_; private: Mir2Lir* const m2l_; @@ -533,8 +553,12 @@ class Mir2Lir : public Backend { * just use our knowledge of type to select the most appropriate register class? */ RegisterClass RegClassBySize(OpSize size) { - return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte || - size == kSignedByte) ? kCoreReg : kAnyReg; + if (size == kReference) { + return kRefReg; + } else { + return (size == kUnsignedHalf || size == kSignedHalf || size == kUnsignedByte || + size == kSignedByte) ? kCoreReg : kAnyReg; + } } size_t CodeBufferSizeInBytes() { @@ -595,6 +619,8 @@ class Mir2Lir : public Backend { return current_dalvik_offset_; } + RegisterClass ShortyToRegClass(char shorty_type); + RegisterClass LocToRegClass(RegLocation loc); int ComputeFrameSize(); virtual void Materialize(); virtual CompiledMethod* GetCompiledMethod(); @@ -648,7 +674,7 @@ class Mir2Lir : public Backend { virtual void EndInvoke(CallInfo* info) {} - // Handle bookkeeping to convert a wide RegLocation to a narow RegLocation. No code generated. + // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation. No code generated. RegLocation NarrowRegLoc(RegLocation loc); // Shared by all targets - implemented in local_optimizations.cc @@ -682,6 +708,7 @@ class Mir2Lir : public Backend { virtual RegStorage AllocFreeTemp(); virtual RegStorage AllocTemp(); virtual RegStorage AllocTempWide(); + virtual RegStorage AllocTempRef(); virtual RegStorage AllocTempSingle(); virtual RegStorage AllocTempDouble(); virtual RegStorage AllocTypedTemp(bool fp_hint, int reg_class); @@ -701,7 +728,6 @@ class Mir2Lir : public Backend { void NullifyRange(RegStorage reg, int s_reg); void MarkDef(RegLocation rl, LIR *start, LIR *finish); void MarkDefWide(RegLocation rl, LIR *start, LIR *finish); - virtual RegLocation WideToNarrow(RegLocation rl); void ResetDefLoc(RegLocation rl); void ResetDefLocWide(RegLocation rl); void ResetDefTracking(); @@ -746,8 +772,8 @@ class Mir2Lir : public Backend { void DoPromotion(); int VRegOffset(int v_reg); int SRegOffset(int s_reg); - RegLocation GetReturnWide(bool is_double); - RegLocation GetReturn(bool is_float); + RegLocation GetReturnWide(RegisterClass reg_class); + RegLocation GetReturn(RegisterClass reg_class); RegisterInfo* GetRegInfo(RegStorage reg); // Shared by all targets - implemented in gen_common.cc. @@ -955,6 +981,8 @@ class Mir2Lir : public Backend { } // Load Dalvik value with 32-bit memory storage. If compressed object reference, decompress. virtual RegLocation LoadValue(RegLocation rl_src, RegisterClass op_kind); + // Same as above, but derive the target register class from the location record. + virtual RegLocation LoadValue(RegLocation rl_src); // Load Dalvik value with 64-bit memory storage. virtual RegLocation LoadValueWide(RegLocation rl_src, RegisterClass op_kind); // Load Dalvik value with 32-bit memory storage. If compressed object reference, decompress. @@ -1104,6 +1132,7 @@ class Mir2Lir : public Backend { virtual RegLocation GetReturnAlt() = 0; virtual RegLocation GetReturnWideAlt() = 0; virtual RegLocation LocCReturn() = 0; + virtual RegLocation LocCReturnRef() = 0; virtual RegLocation LocCReturnDouble() = 0; virtual RegLocation LocCReturnFloat() = 0; virtual RegLocation LocCReturnWide() = 0; diff --git a/compiler/dex/quick/ralloc_util.cc b/compiler/dex/quick/ralloc_util.cc index e5ca460e88..bbeef50d73 100644 --- a/compiler/dex/quick/ralloc_util.cc +++ b/compiler/dex/quick/ralloc_util.cc @@ -56,16 +56,16 @@ Mir2Lir::RegisterInfo::RegisterInfo(RegStorage r, uint64_t mask) } Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, - const std::vector<RegStorage>& core_regs, - const std::vector<RegStorage>& core64_regs, - const std::vector<RegStorage>& sp_regs, - const std::vector<RegStorage>& dp_regs, - const std::vector<RegStorage>& reserved_regs, - const std::vector<RegStorage>& reserved64_regs, - const std::vector<RegStorage>& core_temps, - const std::vector<RegStorage>& core64_temps, - const std::vector<RegStorage>& sp_temps, - const std::vector<RegStorage>& dp_temps) : + const ArrayRef<const RegStorage>& core_regs, + const ArrayRef<const RegStorage>& core64_regs, + const ArrayRef<const RegStorage>& sp_regs, + const ArrayRef<const RegStorage>& dp_regs, + const ArrayRef<const RegStorage>& reserved_regs, + const ArrayRef<const RegStorage>& reserved64_regs, + const ArrayRef<const RegStorage>& core_temps, + const ArrayRef<const RegStorage>& core64_temps, + const ArrayRef<const RegStorage>& sp_temps, + const ArrayRef<const RegStorage>& dp_temps) : core_regs_(arena, core_regs.size()), next_core_reg_(0), core64_regs_(arena, core64_regs.size()), next_core64_reg_(0), sp_regs_(arena, sp_regs.size()), next_sp_reg_(0), @@ -128,6 +128,15 @@ Mir2Lir::RegisterPool::RegisterPool(Mir2Lir* m2l, ArenaAllocator* arena, // Add an entry for InvalidReg with zero'd mask. RegisterInfo* invalid_reg = new (arena) RegisterInfo(RegStorage::InvalidReg(), 0); m2l_->reginfo_map_.Put(RegStorage::InvalidReg().GetReg(), invalid_reg); + + // Existence of core64 registers implies wide references. + if (core64_regs_.Size() != 0) { + ref_regs_ = &core64_regs_; + next_ref_reg_ = &next_core64_reg_; + } else { + ref_regs_ = &core_regs_; + next_ref_reg_ = &next_core_reg_; + } } void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) { @@ -145,6 +154,7 @@ void Mir2Lir::DumpRegPool(GrowableArray<RegisterInfo*>* regs) { void Mir2Lir::DumpCoreRegPool() { DumpRegPool(®_pool_->core_regs_); + DumpRegPool(®_pool_->core64_regs_); } void Mir2Lir::DumpFpRegPool() { @@ -274,6 +284,7 @@ void Mir2Lir::RecordCorePromotion(RegStorage reg, int s_reg) { /* Reserve a callee-save register. Return InvalidReg if none available */ RegStorage Mir2Lir::AllocPreservedCoreReg(int s_reg) { + // TODO: 64-bit and refreg update RegStorage res; GrowableArray<RegisterInfo*>::Iterator it(®_pool_->core_regs_); for (RegisterInfo* info = it.Next(); info != nullptr; info = it.Next()) { @@ -406,6 +417,12 @@ RegStorage Mir2Lir::AllocTempWide() { return res; } +RegStorage Mir2Lir::AllocTempRef() { + RegStorage res = AllocTempBody(*reg_pool_->ref_regs_, reg_pool_->next_ref_reg_, true); + DCHECK(!res.IsPair()); + return res; +} + RegStorage Mir2Lir::AllocTempSingle() { RegStorage res = AllocTempBody(reg_pool_->sp_regs_, ®_pool_->next_sp_reg_, true); DCHECK(res.IsSingle()) << "Reg: 0x" << std::hex << res.GetRawBits(); @@ -419,6 +436,7 @@ RegStorage Mir2Lir::AllocTempDouble() { } RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) { + DCHECK_NE(reg_class, kRefReg); // NOTE: the Dalvik width of a reference is always 32 bits. if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { return AllocTempDouble(); } @@ -428,6 +446,8 @@ RegStorage Mir2Lir::AllocTypedTempWide(bool fp_hint, int reg_class) { RegStorage Mir2Lir::AllocTypedTemp(bool fp_hint, int reg_class) { if (((reg_class == kAnyReg) && fp_hint) || (reg_class == kFPReg)) { return AllocTempSingle(); + } else if (reg_class == kRefReg) { + return AllocTempRef(); } return AllocTemp(); } @@ -446,8 +466,10 @@ RegStorage Mir2Lir::FindLiveReg(GrowableArray<RegisterInfo*> ®s, int s_reg) { RegStorage Mir2Lir::AllocLiveReg(int s_reg, int reg_class, bool wide) { RegStorage reg; - // TODO: might be worth a sanity check here to verify at most 1 live reg per s_reg. - if ((reg_class == kAnyReg) || (reg_class == kFPReg)) { + if (reg_class == kRefReg) { + reg = FindLiveReg(*reg_pool_->ref_regs_, s_reg); + } + if (!reg.Valid() && ((reg_class == kAnyReg) || (reg_class == kFPReg))) { reg = FindLiveReg(wide ? reg_pool_->dp_regs_ : reg_pool_->sp_regs_, s_reg); } if (!reg.Valid() && (reg_class != kFPReg)) { @@ -662,39 +684,6 @@ void Mir2Lir::MarkDefWide(RegLocation rl, LIR *start, LIR *finish) { p->SetDefEnd(finish); } -RegLocation Mir2Lir::WideToNarrow(RegLocation rl) { - DCHECK(rl.wide); - if (rl.location == kLocPhysReg) { - if (rl.reg.IsPair()) { - RegisterInfo* info_lo = GetRegInfo(rl.reg.GetLow()); - RegisterInfo* info_hi = GetRegInfo(rl.reg.GetHigh()); - if (info_lo->IsTemp()) { - info_lo->SetIsWide(false); - info_lo->ResetDefBody(); - } - if (info_hi->IsTemp()) { - info_hi->SetIsWide(false); - info_hi->ResetDefBody(); - } - rl.reg = rl.reg.GetLow(); - } else { - /* - * TODO: If not a pair, we can't just drop the high register. On some targets, we may be - * able to re-cast the 64-bit register as 32 bits, so it might be worthwhile to revisit - * this code. Will probably want to make this a virtual function. - */ - // Can't narrow 64-bit register. Clobber. - if (GetRegInfo(rl.reg)->IsTemp()) { - Clobber(rl.reg); - FreeTemp(rl.reg); - } - rl.location = kLocDalvikFrame; - } - } - rl.wide = false; - return rl; -} - void Mir2Lir::ResetDefLoc(RegLocation rl) { DCHECK(!rl.wide); if (IsTemp(rl.reg) && !(cu_->disable_opt & (1 << kSuppressLoads))) { @@ -714,16 +703,8 @@ void Mir2Lir::ResetDefLocWide(RegLocation rl) { } void Mir2Lir::ResetDefTracking() { - GrowableArray<RegisterInfo*>::Iterator core_it(®_pool_->core_regs_); - for (RegisterInfo* info = core_it.Next(); info != nullptr; info = core_it.Next()) { - info->ResetDefBody(); - } - GrowableArray<RegisterInfo*>::Iterator sp_it(®_pool_->core_regs_); - for (RegisterInfo* info = sp_it.Next(); info != nullptr; info = sp_it.Next()) { - info->ResetDefBody(); - } - GrowableArray<RegisterInfo*>::Iterator dp_it(®_pool_->core_regs_); - for (RegisterInfo* info = dp_it.Next(); info != nullptr; info = dp_it.Next()) { + GrowableArray<RegisterInfo*>::Iterator iter(&tempreg_info_); + for (RegisterInfo* info = iter.Next(); info != NULL; info = iter.Next()) { info->ResetDefBody(); } } @@ -798,7 +779,11 @@ void Mir2Lir::FlushAllRegs() { bool Mir2Lir::RegClassMatches(int reg_class, RegStorage reg) { if (reg_class == kAnyReg) { return true; - } else if (reg_class == kCoreReg) { + } else if ((reg_class == kCoreReg) || (reg_class == kRefReg)) { + /* + * For this purpose, consider Core and Ref to be the same class. We aren't dealing + * with width here - that should be checked at a higher level (if needed). + */ return !reg.IsFloat(); } else { return reg.IsFloat(); @@ -1334,20 +1319,26 @@ int Mir2Lir::SRegOffset(int s_reg) { } /* Mark register usage state and return long retloc */ -RegLocation Mir2Lir::GetReturnWide(bool is_double) { - RegLocation gpr_res = LocCReturnWide(); - RegLocation fpr_res = LocCReturnDouble(); - RegLocation res = is_double ? fpr_res : gpr_res; +RegLocation Mir2Lir::GetReturnWide(RegisterClass reg_class) { + RegLocation res; + switch (reg_class) { + case kRefReg: LOG(FATAL); break; + case kFPReg: res = LocCReturnDouble(); break; + default: res = LocCReturnWide(); break; + } Clobber(res.reg); LockTemp(res.reg); MarkWide(res.reg); return res; } -RegLocation Mir2Lir::GetReturn(bool is_float) { - RegLocation gpr_res = LocCReturn(); - RegLocation fpr_res = LocCReturnFloat(); - RegLocation res = is_float ? fpr_res : gpr_res; +RegLocation Mir2Lir::GetReturn(RegisterClass reg_class) { + RegLocation res; + switch (reg_class) { + case kRefReg: res = LocCReturnRef(); break; + case kFPReg: res = LocCReturnFloat(); break; + default: res = LocCReturn(); break; + } Clobber(res.reg); if (cu_->instruction_set == kMips) { MarkInUse(res.reg); diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index 91a66d38e0..39a036560e 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -63,27 +63,36 @@ const X86EncodingMap X86Mir2Lir::EncodingMap[kX86Last] = { { kX86 ## opname ## 16TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0x66, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "16TI8", "fs:[!0d],!1d" }, \ \ { kX86 ## opname ## 32MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32MR", "[!0r+!1d],!2r" }, \ -{ kX86 ## opname ## 64MR, kMemReg64, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64MR", "[!0r+!1d],!2r" }, \ { kX86 ## opname ## 32AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ -{ kX86 ## opname ## 64AR, kArrayReg64, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ { kX86 ## opname ## 32TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "32TR", "fs:[!0d],!1r" }, \ { kX86 ## opname ## 32RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RR", "!0r,!1r" }, \ { kX86 ## opname ## 32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RM", "!0r,[!1r+!2d]" }, \ -{ kX86 ## opname ## 64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \ { kX86 ## opname ## 32RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { 0, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ -{ kX86 ## opname ## 64RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ { kX86 ## opname ## 32RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "32RT", "!0r,fs:[!1d]" }, \ -{ kX86 ## opname ## 64RT, kReg64Thread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RT", "!0r,fs:[!1d]" }, \ { kX86 ## opname ## 32RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \ -{ kX86 ## opname ## 64RI, kReg64Imm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "32RI", "!0r,!1d" }, \ { kX86 ## opname ## 32MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32MI", "[!0r+!1d],!2d" }, \ { kX86 ## opname ## 32AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ { kX86 ## opname ## 32TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "32TI", "fs:[!0d],!1d" }, \ { kX86 ## opname ## 32RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32RI8", "!0r,!1d" }, \ -{ kX86 ## opname ## 64RI8, kReg64Imm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64RI8", "!0r,!1d" }, \ { kX86 ## opname ## 32MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32MI8", "[!0r+!1d],!2d" }, \ { kX86 ## opname ## 32AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { 0, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ -{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32TI8", "fs:[!0d],!1d" } +{ kX86 ## opname ## 32TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "32TI8", "fs:[!0d],!1d" }, \ + \ +{ kX86 ## opname ## 64MR, kMemReg, mem_use | IS_TERTIARY_OP | REG_USE02 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64MR", "[!0r+!1d],!2r" }, \ +{ kX86 ## opname ## 64AR, kArrayReg, mem_use | IS_QUIN_OP | REG_USE014 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64AR", "[!0r+!1r<<!2d+!3d],!4r" }, \ +{ kX86 ## opname ## 64TR, kThreadReg, mem_use | IS_BINARY_OP | REG_USE1 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_r32, 0, 0, 0, 0, 0 }, #opname "64TR", "fs:[!0d],!1r" }, \ +{ kX86 ## opname ## 64RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RR", "!0r,!1r" }, \ +{ kX86 ## opname ## 64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RM", "!0r,[!1r+!2d]" }, \ +{ kX86 ## opname ## 64RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE012 | SETS_CCODES | uses_ccodes, { REX_W, 0, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, \ +{ kX86 ## opname ## 64RT, kRegThread, IS_LOAD | IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, r32_rm32, 0, 0, 0, 0, 0 }, #opname "64RT", "!0r,fs:[!1d]" }, \ +{ kX86 ## opname ## 64RI, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, ax32_i32, 4 }, #opname "64RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 64MI, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "64MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 64AI, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 64TI, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i32, 0, 0, rm32_i32_modrm, 0, 4 }, #opname "64TI", "fs:[!0d],!1d" }, \ +{ kX86 ## opname ## 64RI8, kRegImm, IS_BINARY_OP | reg_def | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64RI8", "!0r,!1d" }, \ +{ kX86 ## opname ## 64MI8, kMemImm, mem_use | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64MI8", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 64AI8, kArrayImm, mem_use | IS_QUIN_OP | REG_USE01 | SETS_CCODES | uses_ccodes, { REX_W, 0, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64AI8", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 64TI8, kThreadImm, mem_use | IS_BINARY_OP | SETS_CCODES | uses_ccodes, { THREAD_PREFIX, REX_W, rm32_i8, 0, 0, rm32_i8_modrm, 0, 1 }, #opname "64TI8", "fs:[!0d],!1d" } ENCODING_MAP(Add, IS_LOAD | IS_STORE, REG_DEF0, 0, 0x00 /* RegMem8/Reg8 */, 0x01 /* RegMem32/Reg32 */, @@ -146,6 +155,13 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Imul32RMI8, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RMI8", "!0r,[!1r+!2d],!3d" }, { kX86Imul32RAI8, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { 0, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul32RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + { kX86Imul64RRI, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RRI", "!0r,!1r,!2d" }, + { kX86Imul64RMI, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RMI", "!0r,[!1r+!2d],!3d" }, + { kX86Imul64RAI, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x69, 0, 0, 0, 0, 8 }, "Imul64RAI", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + { kX86Imul64RRI8, kRegRegImm, IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RRI8", "!0r,!1r,!2d" }, + { kX86Imul64RMI8, kRegMemImm, IS_LOAD | IS_QUAD_OP | REG_DEF0_USE1 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RMI8", "!0r,[!1r+!2d],!3d" }, + { kX86Imul64RAI8, kRegArrayImm, IS_LOAD | IS_SEXTUPLE_OP | REG_DEF0_USE12 | SETS_CCODES, { REX_W, 0, 0x6B, 0, 0, 0, 0, 1 }, "Imul64RAI8", "!0r,[!1r+!2r<<!3d+!4d],!5d" }, + { kX86Mov8MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8MR", "[!0r+!1d],!2r" }, { kX86Mov8AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8AR", "[!0r+!1r<<!2d+!3d],!4r" }, { kX86Mov8TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x88, 0, 0, 0, 0, 0 }, "Mov8TR", "fs:[!0d],!1r" }, @@ -171,30 +187,42 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Mov16TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0x66, 0xC7, 0, 0, 0, 0, 2 }, "Mov16TI", "fs:[!0d],!1d" }, { kX86Mov32MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { 0, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32MR", "[!0r+!1d],!2r" }, - { kX86Mov64MR, kMemReg64, IS_STORE | IS_TERTIARY_OP | REG_USE02, { REX_W, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" }, { kX86Mov32AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { 0, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32AR", "[!0r+!1r<<!2d+!3d],!4r" }, - { kX86Mov64AR, kArrayReg64, IS_STORE | IS_QUIN_OP | REG_USE014, { REX_W, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" }, { kX86Mov32TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov32TR", "fs:[!0d],!1r" }, { kX86Mov32RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RR", "!0r,!1r" }, { kX86Mov32RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RM", "!0r,[!1r+!2d]" }, - { kX86Mov64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" }, { kX86Mov32RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, - { kX86Mov64RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, { kX86Mov32RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov32RT", "!0r,fs:[!1d]" }, - { kX86Mov64RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" }, { kX86Mov32RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { 0, 0, 0xB8, 0, 0, 0, 0, 4 }, "Mov32RI", "!0r,!1d" }, { kX86Mov32MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32MI", "[!0r+!1d],!2d" }, { kX86Mov32AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { 0, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32AI", "[!0r+!1r<<!2d+!3d],!4d" }, { kX86Mov32TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, 0, 0xC7, 0, 0, 0, 0, 4 }, "Mov32TI", "fs:[!0d],!1d" }, - { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 4 }, "Mov64TI", "fs:[!0d],!1d" }, - { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" }, + { kX86Lea32RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RM", "!0r,[!1r+!2d]" }, - { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Lea32RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { 0, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea32RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + + { kX86Mov64MR, kMemReg, IS_STORE | IS_TERTIARY_OP | REG_USE02, { REX_W, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov64MR", "[!0r+!1d],!2r" }, + { kX86Mov64AR, kArrayReg, IS_STORE | IS_QUIN_OP | REG_USE014, { REX_W, 0, 0x89, 0, 0, 0, 0, 0 }, "Mov64AR", "[!0r+!1r<<!2d+!3d],!4r" }, + { kX86Mov64TR, kThreadReg, IS_STORE | IS_BINARY_OP | REG_USE1, { THREAD_PREFIX, REX_W, 0x89, 0, 0, 0, 0, 0 }, "Mov64TR", "fs:[!0d],!1r" }, + { kX86Mov64RR, kRegReg, IS_BINARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RR", "!0r,!1r" }, + { kX86Mov64RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | REG_DEF0_USE1, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RM", "!0r,[!1r+!2d]" }, + { kX86Mov64RA, kRegArray, IS_LOAD | IS_QUIN_OP | REG_DEF0_USE12, { REX_W, 0, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + { kX86Mov64RT, kRegThread, IS_LOAD | IS_BINARY_OP | REG_DEF0, { THREAD_PREFIX, REX_W, 0x8B, 0, 0, 0, 0, 0 }, "Mov64RT", "!0r,fs:[!1d]" }, + { kX86Mov64RI, kMovRegImm, IS_BINARY_OP | REG_DEF0, { REX_W, 0, 0xB8, 0, 0, 0, 0, 8 }, "Mov64RI", "!0r,!1d" }, + { kX86Mov64MI, kMemImm, IS_STORE | IS_TERTIARY_OP | REG_USE0, { REX_W, 0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64MI", "[!0r+!1d],!2d" }, + { kX86Mov64AI, kArrayImm, IS_STORE | IS_QUIN_OP | REG_USE01, { REX_W, 0, 0xC7, 0, 0, 0, 0, 8 }, "Mov64AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Mov64TI, kThreadImm, IS_STORE | IS_BINARY_OP, { THREAD_PREFIX, REX_W, 0xC7, 0, 0, 0, 0, 8 }, "Mov64TI", "fs:[!0d],!1d" }, - { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" }, + { kX86Lea64RM, kRegMem, IS_TERTIARY_OP | IS_LOAD | REG_DEF0_USE1, { REX_W, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RM", "!0r,[!1r+!2d]" }, - { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" }, + { kX86Lea64RA, kRegArray, IS_QUIN_OP | REG_DEF0_USE12, { REX_W, 0, 0x8D, 0, 0, 0, 0, 0 }, "Lea64RA", "!0r,[!1r+!2r<<!3d+!4d]" }, + + { kX86Cmov32RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RR", "!2c !0r,!1r" }, + { kX86Cmov64RRC, kRegRegCond, IS_TERTIARY_OP | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RR", "!2c !0r,!1r" }, + + { kX86Cmov32RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {0, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc32RM", "!3c !0r,[!1r+!2d]" }, + { kX86Cmov64RMC, kRegMemCond, IS_QUAD_OP | IS_LOAD | REG_DEF0_USE01 | USES_CCODES, {REX_W, 0, 0x0F, 0x40, 0, 0, 0, 0}, "Cmovcc64RM", "!3c !0r,[!1r+!2d]" }, #define SHIFT_ENCODING_MAP(opname, modrm_opcode) \ { kX86 ## opname ## 8RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { 0, 0, 0xC0, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "8RI", "!0r,!1d" }, \ @@ -216,7 +244,14 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86 ## opname ## 32AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "32AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ { kX86 ## opname ## 32RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32RC", "!0r,cl" }, \ { kX86 ## opname ## 32MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32MC", "[!0r+!1d],cl" }, \ -{ kX86 ## opname ## 32AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" } +{ kX86 ## opname ## 32AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { 0, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "32AC", "[!0r+!1r<<!2d+!3d],cl" }, \ + \ +{ kX86 ## opname ## 64RI, kShiftRegImm, IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64RI", "!0r,!1d" }, \ +{ kX86 ## opname ## 64MI, kShiftMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64MI", "[!0r+!1d],!2d" }, \ +{ kX86 ## opname ## 64AI, kShiftArrayImm, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xC1, 0, 0, modrm_opcode, 0xD1, 1 }, #opname "64AI", "[!0r+!1r<<!2d+!3d],!4d" }, \ +{ kX86 ## opname ## 64RC, kShiftRegCl, IS_BINARY_OP | REG_DEF0_USE0 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "64RC", "!0r,cl" }, \ +{ kX86 ## opname ## 64MC, kShiftMemCl, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "64MC", "[!0r+!1d],cl" }, \ +{ kX86 ## opname ## 64AC, kShiftArrayCl, IS_LOAD | IS_STORE | IS_QUIN_OP | REG_USE01 | REG_USEC | SETS_CCODES, { REX_W, 0, 0xD3, 0, 0, modrm_opcode, 0, 0 }, #opname "64AC", "[!0r+!1r<<!2d+!3d],cl" } SHIFT_ENCODING_MAP(Rol, 0x0), SHIFT_ENCODING_MAP(Ror, 0x1), @@ -232,6 +267,10 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Shld32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld32MRI", "[!0r+!1d],!2r,!3d" }, { kX86Shrd32RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32RRI", "!0r,!1r,!2d" }, { kX86Shrd32MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { 0, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd32MRI", "[!0r+!1d],!2r,!3d" }, + { kX86Shld64RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64RRI", "!0r,!1r,!2d" }, + { kX86Shld64MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W, 0, 0x0F, 0xA4, 0, 0, 0, 1}, "Shld64MRI", "[!0r+!1d],!2r,!3d" }, + { kX86Shrd64RRI, kRegRegImmRev, IS_TERTIARY_OP | REG_DEF0_USE01 | SETS_CCODES, { REX_W, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64RRI", "!0r,!1r,!2d" }, + { kX86Shrd64MRI, kMemRegImm, IS_QUAD_OP | REG_USE02 | IS_LOAD | IS_STORE | SETS_CCODES, { REX_W, 0, 0x0F, 0xAC, 0, 0, 0, 1}, "Shrd64MRI", "[!0r+!1d],!2r,!3d" }, { kX86Test8RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8RI", "!0r,!1d" }, { kX86Test8MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF6, 0, 0, 0, 0, 1}, "Test8MI", "[!0r+!1d],!2d" }, @@ -242,7 +281,12 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Test32RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32RI", "!0r,!1d" }, { kX86Test32MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32MI", "[!0r+!1d],!2d" }, { kX86Test32AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0xF7, 0, 0, 0, 0, 4}, "Test32AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test64RI, kRegImm, IS_BINARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64RI", "!0r,!1d" }, + { kX86Test64MI, kMemImm, IS_LOAD | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64MI", "[!0r+!1d],!2d" }, + { kX86Test64AI, kArrayImm, IS_LOAD | IS_QUIN_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0xF7, 0, 0, 0, 0, 8}, "Test64AI", "[!0r+!1r<<!2d+!3d],!4d" }, + { kX86Test32RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { 0, 0, 0x85, 0, 0, 0, 0, 0}, "Test32RR", "!0r,!1r" }, + { kX86Test64RR, kRegReg, IS_BINARY_OP | REG_USE01 | SETS_CCODES, { REX_W, 0, 0x85, 0, 0, 0, 0, 0}, "Test64RR", "!0r,!1r" }, #define UNARY_ENCODING_MAP(opname, modrm, is_store, sets_ccodes, \ reg, reg_kind, reg_flags, \ @@ -258,7 +302,10 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86 ## opname ## 16 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | hw_flags | sets_ccodes, { 0x66, 0, 0xF7, 0, 0, modrm, 0, imm << 1}, #opname "16" #arr, hw_format "[!0r+!1r<<!2d+!3d]" }, \ { kX86 ## opname ## 32 ## reg, reg_kind, reg_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #reg, w_format "!0r" }, \ { kX86 ## opname ## 32 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #mem, w_format "[!0r+!1d]" }, \ -{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" } +{ kX86 ## opname ## 32 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { 0, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "32" #arr, w_format "[!0r+!1r<<!2d+!3d]" }, \ +{ kX86 ## opname ## 64 ## reg, reg_kind, reg_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #reg, w_format "!0r" }, \ +{ kX86 ## opname ## 64 ## mem, mem_kind, IS_LOAD | is_store | mem_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #mem, w_format "[!0r+!1d]" }, \ +{ kX86 ## opname ## 64 ## arr, arr_kind, IS_LOAD | is_store | arr_flags | w_flags | sets_ccodes, { REX_W, 0, 0xF7, 0, 0, modrm, 0, imm << 2}, #opname "64" #arr, w_format "[!0r+!1r<<!2d+!3d]" } UNARY_ENCODING_MAP(Not, 0x2, IS_STORE, 0, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""), UNARY_ENCODING_MAP(Neg, 0x3, IS_STORE, SETS_CCODES, R, kReg, IS_UNARY_OP | REG_DEF0_USE0, M, kMem, IS_BINARY_OP | REG_USE0, A, kArray, IS_QUAD_OP | REG_USE01, 0, 0, 0, 0, "", "", ""), @@ -431,7 +478,8 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86RepneScasw, kPrefix2Nullary, NO_OPERAND | REG_USEA | REG_USEC | SETS_CCODES, { 0x66, 0xF2, 0xAF, 0, 0, 0, 0, 0 }, "RepNE ScasW", "" }, }; -size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib) { +size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displacement, + int reg_r, int reg_x, bool has_sib) { size_t size = 0; if (entry->skeleton.prefix1 > 0) { ++size; @@ -439,6 +487,10 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa ++size; } } + if ((NeedsRex(base) || NeedsRex(reg_r) || NeedsRex(reg_x)) && + entry->skeleton.prefix1 != REX_W && entry->skeleton.prefix2 != REX_W) { + ++size; // REX_R + } ++size; // opcode if (entry->skeleton.opcode == 0x0F) { ++size; @@ -447,13 +499,13 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa } } ++size; // modrm - if (has_sib || RegStorage::RegNum(base) == rs_rX86_SP.GetRegNum() + if (has_sib || LowRegisterBits(RegStorage::RegNum(base)) == rs_rX86_SP.GetRegNum() || (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX)) { // SP requires a SIB byte. // GS access also needs a SIB byte for absolute adressing in 64-bit mode. ++size; } - if (displacement != 0 || RegStorage::RegNum(base) == rs_rBP.GetRegNum()) { + if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) { // BP requires an explicit displacement, even when it's 0. if (entry->opcode != kX86Lea32RA) { DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name; @@ -477,38 +529,41 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kPrefix2Nullary: return 3; // 1 byte of opcode + 2 prefixes case kRegOpcode: // lir operands - 0: reg - return ComputeSize(entry, 0, 0, false) - 1; // substract 1 for modrm - case kReg64: + // substract 1 for modrm + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - 1; case kReg: // lir operands - 0: reg - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); case kMem: // lir operands - 0: base, 1: disp - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); - case kMemReg64: + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kMemReg: // lir operands - 0: base, 1: disp, 2: reg - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], + lir->operands[2], NO_REG, false); case kMemRegImm: // lir operands - 0: base, 1: disp, 2: reg 3: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); - case kArrayReg64: + return ComputeSize(entry, lir->operands[0], lir->operands[1], + lir->operands[2], NO_REG, false); case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + lir->operands[4], lir->operands[1], true); case kThreadReg: // lir operands - 0: disp, 1: reg - return ComputeSize(entry, 0, lir->operands[0], false); - case kRegReg: - return ComputeSize(entry, 0, 0, false); - case kRegRegStore: - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, lir->operands[0], lir->operands[1], NO_REG, false); + case kRegReg: // lir operands - 0: reg1, 1: reg2 + return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); + case kRegRegStore: // lir operands - 0: reg2, 1: reg1 + return ComputeSize(entry, 0, 0, lir->operands[1], lir->operands[0], false); case kRegMem: // lir operands - 0: reg, 1: base, 2: disp - return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + return ComputeSize(entry, lir->operands[1], lir->operands[2], + lir->operands[0], NO_REG, false); case kRegArray: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp - return ComputeSize(entry, lir->operands[1], lir->operands[4], true); - case kReg64Thread: // lir operands - 0: reg, 1: disp + return ComputeSize(entry, lir->operands[1], lir->operands[4], + lir->operands[0], lir->operands[2], true); case kRegThread: // lir operands - 0: reg, 1: disp - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit - case kReg64Imm: + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, lir->operands[0], NO_REG, false); case kRegImm: { // lir operands - 0: reg, 1: immediate - size_t size = ComputeSize(entry, 0, 0, false); + size_t size = ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); if (entry->skeleton.ax_opcode == 0) { return size; } else { @@ -518,47 +573,58 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } } case kMemImm: // lir operands - 0: base, 1: disp, 2: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], + NO_REG, lir->operands[0], false); case kArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kThreadImm: // lir operands - 0: disp, 1: imm - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); case kRegRegImm: // lir operands - 0: reg, 1: reg, 2: imm case kRegRegImmRev: - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); case kRegMemImm: // lir operands - 0: reg, 1: base, 2: disp, 3: imm - return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + return ComputeSize(entry, lir->operands[1], lir->operands[2], + lir->operands[0], NO_REG, false); case kRegArrayImm: // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp, 5: imm - return ComputeSize(entry, lir->operands[1], lir->operands[4], true); + return ComputeSize(entry, lir->operands[1], lir->operands[4], + lir->operands[0], lir->operands[2], true); case kMovRegImm: // lir operands - 0: reg, 1: immediate - return 1 + entry->skeleton.immediate_bytes; + return (entry->skeleton.prefix1 != 0 || NeedsRex(lir->operands[0])?1:0) + + 1 + entry->skeleton.immediate_bytes; case kShiftRegImm: // lir operands - 0: reg, 1: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, 0, 0, false) - (lir->operands[1] == 1 ? 1 : 0); + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false) - + (lir->operands[1] == 1 ? 1 : 0); case kShiftMemImm: // lir operands - 0: base, 1: disp, 2: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, lir->operands[0], lir->operands[1], false) - + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false) - (lir->operands[2] == 1 ? 1 : 0); case kShiftArrayImm: // lir operands - 0: base, 1: index, 2: scale, 3: disp 4: immediate // Shift by immediate one has a shorter opcode. - return ComputeSize(entry, lir->operands[0], lir->operands[3], true) - + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true) - (lir->operands[4] == 1 ? 1 : 0); - case kShiftRegCl: - return ComputeSize(entry, 0, 0, false); + case kShiftRegCl: // lir operands - 0: reg, 1: cl + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); case kShiftMemCl: // lir operands - 0: base, 1: disp, 2: cl - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kShiftArrayCl: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + lir->operands[4], lir->operands[1], true); case kRegCond: // lir operands - 0: reg, 1: cond - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], NO_REG, false); case kMemCond: // lir operands - 0: base, 1: disp, 2: cond - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kArrayCond: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: cond - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kRegRegCond: // lir operands - 0: reg, 1: reg, 2: cond - return ComputeSize(entry, 0, 0, false); + return ComputeSize(entry, 0, 0, lir->operands[0], lir->operands[1], false); case kRegMemCond: // lir operands - 0: reg, 1: reg, 2: disp, 3:cond - return ComputeSize(entry, lir->operands[1], lir->operands[2], false); + return ComputeSize(entry, lir->operands[1], lir->operands[2], + lir->operands[0], lir->operands[1], false); case kJcc: if (lir->opcode == kX86Jcc8) { return 2; // opcode + rel8 @@ -572,21 +638,28 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } else if (lir->opcode == kX86Jmp32) { return 5; // opcode + rel32 } else if (lir->opcode == kX86JmpT) { - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); } else { DCHECK(lir->opcode == kX86JmpR); - return 2; // opcode + modrm + if (NeedsRex(lir->operands[0])) { + return 3; // REX.B + opcode + modrm + } else { + return 2; // opcode + modrm + } } case kCall: switch (lir->opcode) { case kX86CallI: return 5; // opcode 0:disp case kX86CallR: return 2; // opcode modrm case kX86CallM: // lir operands - 0: base, 1: disp - return ComputeSize(entry, lir->operands[0], lir->operands[1], false); + return ComputeSize(entry, lir->operands[0], lir->operands[1], NO_REG, NO_REG, false); case kX86CallA: // lir operands - 0: base, 1: index, 2: scale, 3: disp - return ComputeSize(entry, lir->operands[0], lir->operands[3], true); + return ComputeSize(entry, lir->operands[0], lir->operands[3], + NO_REG, lir->operands[1], true); case kX86CallT: // lir operands - 0: disp - return ComputeSize(entry, 0, 0x12345678, false); // displacement size is always 32bit + // displacement size is always 32bit + return ComputeSize(entry, 0, 0x12345678, NO_REG, NO_REG, false); default: break; } @@ -594,16 +667,19 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { case kPcRel: if (entry->opcode == kX86PcRelLoadRA) { // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: table - return ComputeSize(entry, lir->operands[1], 0x12345678, true); + return ComputeSize(entry, lir->operands[1], 0x12345678, + lir->operands[0], lir->operands[2], true); } else { DCHECK(entry->opcode == kX86PcRelAdr); return 5; // opcode with reg + 4 byte immediate } - case kMacro: + case kMacro: // lir operands - 0: reg DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod)); return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ + - ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, false) - - (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); // shorter ax encoding + ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0, + lir->operands[0], NO_REG, false) - + // shorter ax encoding + (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum() ? 1 : 0); default: break; } @@ -612,19 +688,62 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) { } void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry) { + EmitPrefix(entry, NO_REG, NO_REG, NO_REG); +} + +void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry, + uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) { + // REX.WRXB + // W - 64-bit operand + // R - MODRM.reg + // X - SIB.index + // B - MODRM.rm/SIB.base + bool force = false; + bool w = (entry->skeleton.prefix1 == REX_W) || (entry->skeleton.prefix2 == REX_W); + bool r = NeedsRex(reg_r); + bool x = NeedsRex(reg_x); + bool b = NeedsRex(reg_b); + uint8_t rex = force ? 0x40 : 0; + if (w) { + rex |= 0x48; // REX.W000 + } + if (r) { + rex |= 0x44; // REX.0R00 + } + if (x) { + rex |= 0x42; // REX.00X0 + } + if (b) { + rex |= 0x41; // REX.000B + } if (entry->skeleton.prefix1 != 0) { if (Gen64Bit() && entry->skeleton.prefix1 == THREAD_PREFIX) { // 64 bit adresses by GS, not FS code_buffer_.push_back(THREAD_PREFIX_GS); } else { - code_buffer_.push_back(entry->skeleton.prefix1); + if (entry->skeleton.prefix1 == REX_W) { + rex |= entry->skeleton.prefix1; + code_buffer_.push_back(rex); + rex = 0; + } else { + code_buffer_.push_back(entry->skeleton.prefix1); + } } if (entry->skeleton.prefix2 != 0) { - code_buffer_.push_back(entry->skeleton.prefix2); + if (entry->skeleton.prefix2 == REX_W) { + rex |= entry->skeleton.prefix2; + code_buffer_.push_back(rex); + rex = 0; + } else { + code_buffer_.push_back(entry->skeleton.prefix2); + } } } else { DCHECK_EQ(0, entry->skeleton.prefix2); } + if (rex != 0) { + code_buffer_.push_back(rex); + } } void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) { @@ -643,7 +762,12 @@ void X86Mir2Lir::EmitOpcode(const X86EncodingMap* entry) { } void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry) { - EmitPrefix(entry); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG); +} + +void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry, + uint8_t reg_r, uint8_t reg_x, uint8_t reg_b) { + EmitPrefix(entry, reg_r, reg_x, reg_b); EmitOpcode(entry); } @@ -712,7 +836,7 @@ void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t i EmitDisp(base, disp); } -void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int imm) { +void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int64_t imm) { switch (entry->skeleton.immediate_bytes) { case 1: DCHECK(IS_SIMM8(imm)); @@ -724,11 +848,26 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int imm) { code_buffer_.push_back((imm >> 8) & 0xFF); break; case 4: + if (imm <0) { + CHECK_EQ((-imm) & 0x0FFFFFFFFl, -imm); + } else { + CHECK_EQ(imm & 0x0FFFFFFFFl, imm); + } code_buffer_.push_back(imm & 0xFF); code_buffer_.push_back((imm >> 8) & 0xFF); code_buffer_.push_back((imm >> 16) & 0xFF); code_buffer_.push_back((imm >> 24) & 0xFF); break; + case 8: + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + code_buffer_.push_back((imm >> 16) & 0xFF); + code_buffer_.push_back((imm >> 24) & 0xFF); + code_buffer_.push_back((imm >> 32) & 0xFF); + code_buffer_.push_back((imm >> 40) & 0xFF); + code_buffer_.push_back((imm >> 48) & 0xFF); + code_buffer_.push_back((imm >> 56) & 0xFF); + break; default: LOG(FATAL) << "Unexpected immediate bytes (" << entry->skeleton.immediate_bytes << ") for instruction: " << entry->name; @@ -737,7 +876,8 @@ void X86Mir2Lir::EmitImm(const X86EncodingMap* entry, int imm) { } void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); // There's no 3-byte instruction with +rd DCHECK(entry->skeleton.opcode != 0x0F || (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A)); @@ -749,7 +889,8 @@ void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg) { } void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); if (RegStorage::RegNum(reg) >= 4) { DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg)) @@ -763,7 +904,8 @@ void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, uint8_t reg) { } void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -775,15 +917,29 @@ void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp) void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, index, base); + index = LowRegisterBits(index); + base = LowRegisterBits(base); EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } +uint8_t X86Mir2Lir::LowRegisterBits(uint8_t reg) { + uint8_t res = reg; + res = reg & kRegNumMask32; // 3 bits + return res; +} + +bool X86Mir2Lir::NeedsRex(uint8_t reg) { + return RegStorage::RegNum(reg) > 7; +} + void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, base); + reg = LowRegisterBits(reg); + base = LowRegisterBits(base); if (RegStorage::RegNum(reg) >= 4) { DCHECK(strchr(entry->name, '8') == NULL || entry->opcode == kX86Movzx8RM || entry->opcode == kX86Movsx8RM) @@ -802,9 +958,12 @@ void X86Mir2Lir::EmitRegMem(const X86EncodingMap* entry, EmitMemReg(entry, base, disp, reg); } -void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, uint8_t index, - int scale, int disp) { - EmitPrefixAndOpcode(entry); +void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, uint8_t reg, uint8_t base, + uint8_t index, int scale, int disp) { + EmitPrefixAndOpcode(entry, reg, index, base); + reg = LowRegisterBits(reg); + index = LowRegisterBits(index); + base = LowRegisterBits(base); EmitModrmSibDisp(reg, base, index, scale, disp); DCHECK_EQ(0, entry->skeleton.modrm_opcode); DCHECK_EQ(0, entry->skeleton.ax_opcode); @@ -819,7 +978,9 @@ void X86Mir2Lir::EmitArrayReg(const X86EncodingMap* entry, uint8_t base, uint8_t void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t index, int scale, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, index, base); + index = LowRegisterBits(index); + base = LowRegisterBits(base); EmitModrmSibDisp(entry->skeleton.modrm_opcode, base, index, scale, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); EmitImm(entry, imm); @@ -827,7 +988,8 @@ void X86Mir2Lir::EmitArrayImm(const X86EncodingMap* entry, uint8_t base, uint8_t void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int disp) { DCHECK_NE(entry->skeleton.prefix1, 0); - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); if (RegStorage::RegNum(reg) >= 4) { DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << static_cast<int>(RegStorage::RegNum(reg)) @@ -845,7 +1007,9 @@ void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, uint8_t reg, int dis } void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); + reg1 = LowRegisterBits(reg1); + reg2 = LowRegisterBits(reg2); DCHECK_LT(RegStorage::RegNum(reg1), 8); DCHECK_LT(RegStorage::RegNum(reg2), 8); uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2); @@ -857,7 +1021,9 @@ void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, uint8_t reg1, uint8_t r void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); + reg1 = LowRegisterBits(reg1); + reg2 = LowRegisterBits(reg2); DCHECK_LT(RegStorage::RegNum(reg1), 8); DCHECK_LT(RegStorage::RegNum(reg2), 8); uint8_t modrm = (3 << 6) | (RegStorage::RegNum(reg1) << 3) | RegStorage::RegNum(reg2); @@ -874,7 +1040,9 @@ void X86Mir2Lir::EmitRegRegImmRev(const X86EncodingMap* entry, void X86Mir2Lir::EmitRegMemImm(const X86EncodingMap* entry, uint8_t reg, uint8_t base, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg, NO_REG, base); + reg = LowRegisterBits(reg); + base = LowRegisterBits(base); DCHECK(!RegStorage::IsFloat(reg)); DCHECK_LT(RegStorage::RegNum(reg), 8); EmitModrmDisp(reg, base, disp); @@ -889,10 +1057,11 @@ void X86Mir2Lir::EmitMemRegImm(const X86EncodingMap* entry, } void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, reg); if (RegStorage::RegNum(reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) { code_buffer_.push_back(entry->skeleton.ax_opcode); } else { + reg = LowRegisterBits(reg); EmitOpcode(entry); uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); code_buffer_.push_back(modrm); @@ -901,7 +1070,8 @@ void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { } void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int32_t imm) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); EmitImm(entry, imm); @@ -918,17 +1088,37 @@ void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int disp, int imm) { DCHECK_EQ(entry->skeleton.ax_opcode, 0); } -void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { +void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm) { + EmitPrefix(entry, NO_REG, NO_REG, reg); + reg = LowRegisterBits(reg); DCHECK_LT(RegStorage::RegNum(reg), 8); code_buffer_.push_back(0xB8 + RegStorage::RegNum(reg)); - code_buffer_.push_back(imm & 0xFF); - code_buffer_.push_back((imm >> 8) & 0xFF); - code_buffer_.push_back((imm >> 16) & 0xFF); - code_buffer_.push_back((imm >> 24) & 0xFF); + switch (entry->skeleton.immediate_bytes) { + case 4: + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + code_buffer_.push_back((imm >> 16) & 0xFF); + code_buffer_.push_back((imm >> 24) & 0xFF); + break; + case 8: + code_buffer_.push_back(imm & 0xFF); + code_buffer_.push_back((imm >> 8) & 0xFF); + code_buffer_.push_back((imm >> 16) & 0xFF); + code_buffer_.push_back((imm >> 24) & 0xFF); + code_buffer_.push_back((imm >> 32) & 0xFF); + code_buffer_.push_back((imm >> 40) & 0xFF); + code_buffer_.push_back((imm >> 48) & 0xFF); + code_buffer_.push_back((imm >> 56) & 0xFF); + break; + default: + LOG(FATAL) << "Unsupported immediate size for EmitMovRegImm: " + << static_cast<uint32_t>(entry->skeleton.immediate_bytes); + } } void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, reg); + reg = LowRegisterBits(reg); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -955,7 +1145,8 @@ void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int i void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_t cl) { DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg())); - EmitPrefix(entry); + EmitPrefix(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -970,7 +1161,8 @@ void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, uint8_t reg, uint8_ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl) { DCHECK_EQ(cl, static_cast<uint8_t>(rs_rCX.GetReg())); - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -983,7 +1175,8 @@ void X86Mir2Lir::EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int displacement, int imm) { - EmitPrefix(entry); + EmitPrefix(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); if (imm != 1) { code_buffer_.push_back(entry->skeleton.opcode); } else { @@ -1002,7 +1195,8 @@ void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, } void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t condition) { - EmitPrefix(entry); + EmitPrefix(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0x0F, entry->skeleton.opcode); code_buffer_.push_back(0x0F); @@ -1015,7 +1209,8 @@ void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, uint8_t reg, uint8_t c DCHECK_EQ(entry->skeleton.immediate_bytes, 0); } -void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t condition) { +void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int displacement, + uint8_t condition) { if (entry->skeleton.prefix1 != 0) { code_buffer_.push_back(entry->skeleton.prefix1); if (entry->skeleton.prefix2 != 0) { @@ -1037,7 +1232,9 @@ void X86Mir2Lir::EmitMemCond(const X86EncodingMap* entry, uint8_t base, int disp void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t reg2, uint8_t condition) { // Generate prefix and opcode without the condition - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, reg2); + reg1 = LowRegisterBits(reg1); + reg2 = LowRegisterBits(reg2); // Now add the condition. The last byte of opcode is the one that receives it. DCHECK_LE(condition, 0xF); @@ -1059,9 +1256,12 @@ void X86Mir2Lir::EmitRegRegCond(const X86EncodingMap* entry, uint8_t reg1, uint8 code_buffer_.push_back(modrm); } -void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, int displacement, uint8_t condition) { +void X86Mir2Lir::EmitRegMemCond(const X86EncodingMap* entry, uint8_t reg1, uint8_t base, + int displacement, uint8_t condition) { // Generate prefix and opcode without the condition - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, reg1, NO_REG, base); + reg1 = LowRegisterBits(reg1); + base = LowRegisterBits(base); // Now add the condition. The last byte of opcode is the one that receives it. DCHECK_LE(condition, 0xF); @@ -1094,8 +1294,10 @@ void X86Mir2Lir::EmitJmp(const X86EncodingMap* entry, int rel) { code_buffer_.push_back(rel & 0xFF); } else { DCHECK(entry->opcode == kX86JmpR); - code_buffer_.push_back(entry->skeleton.opcode); uint8_t reg = static_cast<uint8_t>(rel); + EmitPrefix(entry, NO_REG, NO_REG, reg); + code_buffer_.push_back(entry->skeleton.opcode); + reg = LowRegisterBits(reg); DCHECK_LT(RegStorage::RegNum(reg), 8); uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | RegStorage::RegNum(reg); code_buffer_.push_back(modrm); @@ -1120,7 +1322,8 @@ void X86Mir2Lir::EmitJcc(const X86EncodingMap* entry, int rel, uint8_t cc) { } void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, uint8_t base, int disp) { - EmitPrefixAndOpcode(entry); + EmitPrefixAndOpcode(entry, NO_REG, NO_REG, base); + base = LowRegisterBits(base); EmitModrmDisp(entry->skeleton.modrm_opcode, base, disp); DCHECK_EQ(0, entry->skeleton.ax_opcode); DCHECK_EQ(0, entry->skeleton.immediate_bytes); @@ -1161,9 +1364,12 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(base_or_table)); disp = tab_rec->offset; } - EmitPrefix(entry); - DCHECK_LT(RegStorage::RegNum(reg), 8); if (entry->opcode == kX86PcRelLoadRA) { + EmitPrefix(entry, reg, index, base_or_table); + reg = LowRegisterBits(reg); + base_or_table = LowRegisterBits(base_or_table); + index = LowRegisterBits(index); + DCHECK_LT(RegStorage::RegNum(reg), 8); code_buffer_.push_back(entry->skeleton.opcode); DCHECK_NE(0x0F, entry->skeleton.opcode); DCHECK_EQ(0, entry->skeleton.extra_opcode1); @@ -1178,6 +1384,7 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, code_buffer_.push_back(sib); DCHECK_EQ(0, entry->skeleton.immediate_bytes); } else { + DCHECK_LT(RegStorage::RegNum(reg), 8); code_buffer_.push_back(entry->skeleton.opcode + RegStorage::RegNum(reg)); } code_buffer_.push_back(disp & 0xFF); @@ -1190,6 +1397,8 @@ void X86Mir2Lir::EmitPcRel(const X86EncodingMap* entry, uint8_t reg, void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset) { DCHECK(entry->opcode == kX86StartOfMethod) << entry->name; + EmitPrefix(entry, reg, NO_REG, NO_REG); + reg = LowRegisterBits(reg); code_buffer_.push_back(0xE8); // call +0 code_buffer_.push_back(0); code_buffer_.push_back(0); @@ -1380,7 +1589,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { case kRegOpcode: // lir operands - 0: reg EmitOpRegOpcode(entry, lir->operands[0]); break; - case kReg64: case kReg: // lir operands - 0: reg EmitOpReg(entry, lir->operands[0]); break; @@ -1390,7 +1598,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { case kArray: // lir operands - 0: base, 1: index, 2: scale, 3: disp EmitOpArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]); break; - case kMemReg64: case kMemReg: // lir operands - 0: base, 1: disp, 2: reg EmitMemReg(entry, lir->operands[0], lir->operands[1], lir->operands[2]); break; @@ -1401,7 +1608,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitArrayImm(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3], lir->operands[4]); break; - case kArrayReg64: case kArrayReg: // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg EmitArrayReg(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3], lir->operands[4]); @@ -1413,7 +1619,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitRegArray(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3], lir->operands[4]); break; - case kReg64Thread: // lir operands - 0: reg, 1: disp case kRegThread: // lir operands - 0: reg, 1: disp EmitRegThread(entry, lir->operands[0], lir->operands[1]); break; @@ -1437,7 +1642,6 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitRegMemImm(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]); break; - case kReg64Imm: case kRegImm: // lir operands - 0: reg, 1: immediate EmitRegImm(entry, lir->operands[0], lir->operands[1]); break; @@ -1469,7 +1673,8 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitRegRegCond(entry, lir->operands[0], lir->operands[1], lir->operands[2]); break; case kRegMemCond: // lir operands - 0: reg, 1: reg, displacement, 3: condition - EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3]); + EmitRegMemCond(entry, lir->operands[0], lir->operands[1], lir->operands[2], + lir->operands[3]); break; case kJmp: // lir operands - 0: rel if (entry->opcode == kX86JmpT) { @@ -1503,7 +1708,7 @@ AssemblerStatus X86Mir2Lir::AssembleInstructions(CodeOffset start_addr) { EmitPcRel(entry, lir->operands[0], lir->operands[1], lir->operands[2], lir->operands[3], lir->operands[4]); break; - case kMacro: + case kMacro: // lir operands - 0: reg EmitMacro(entry, lir->operands[0], lir->offset); break; default: diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index 4673cc0f7e..f363eb3a63 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -169,7 +169,7 @@ void X86Mir2Lir::GenMoveException(RegLocation rl_dest) { int ex_offset = Is64BitInstructionSet(cu_->instruction_set) ? Thread::ExceptionOffset<8>().Int32Value() : Thread::ExceptionOffset<4>().Int32Value(); - RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); + RegLocation rl_result = EvalLoc(rl_dest, kRefReg, true); NewLIR2(kX86Mov32RT, rl_result.reg.GetReg(), ex_offset); NewLIR2(kX86Mov32TI, ex_offset, 0); StoreValue(rl_dest, rl_result); diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h index 1807d5c13e..648c148c15 100644 --- a/compiler/dex/quick/x86/codegen_x86.h +++ b/compiler/dex/quick/x86/codegen_x86.h @@ -59,6 +59,7 @@ class X86Mir2Lir : public Mir2Lir { RegLocation GetReturnAlt(); RegLocation GetReturnWideAlt(); RegLocation LocCReturn(); + RegLocation LocCReturnRef(); RegLocation LocCReturnDouble(); RegLocation LocCReturnFloat(); RegLocation LocCReturnWide(); @@ -331,15 +332,21 @@ class X86Mir2Lir : public Mir2Lir { std::vector<uint8_t>* ReturnCallFrameInformation(); protected: - size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, bool has_sib); + size_t ComputeSize(const X86EncodingMap* entry, int base, int displacement, + int reg_r, int reg_x, bool has_sib); + uint8_t LowRegisterBits(uint8_t reg); + bool NeedsRex(uint8_t reg); void EmitPrefix(const X86EncodingMap* entry); + void EmitPrefix(const X86EncodingMap* entry, uint8_t reg_r, uint8_t reg_x, uint8_t reg_b); void EmitOpcode(const X86EncodingMap* entry); void EmitPrefixAndOpcode(const X86EncodingMap* entry); + void EmitPrefixAndOpcode(const X86EncodingMap* entry, + uint8_t reg_r, uint8_t reg_x, uint8_t reg_b); void EmitDisp(uint8_t base, int disp); void EmitModrmThread(uint8_t reg_or_opcode); void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int disp); void EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index, int scale, int disp); - void EmitImm(const X86EncodingMap* entry, int imm); + void EmitImm(const X86EncodingMap* entry, int64_t imm); void EmitOpRegOpcode(const X86EncodingMap* entry, uint8_t reg); void EmitOpReg(const X86EncodingMap* entry, uint8_t reg); void EmitOpMem(const X86EncodingMap* entry, uint8_t base, int disp); @@ -362,7 +369,7 @@ class X86Mir2Lir : public Mir2Lir { void EmitMemRegImm(const X86EncodingMap* entry, uint8_t base, int disp, uint8_t reg1, int32_t imm); void EmitRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); void EmitThreadImm(const X86EncodingMap* entry, int disp, int imm); - void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); + void EmitMovRegImm(const X86EncodingMap* entry, uint8_t reg, int64_t imm); void EmitShiftRegImm(const X86EncodingMap* entry, uint8_t reg, int imm); void EmitShiftMemImm(const X86EncodingMap* entry, uint8_t base, int disp, int imm); void EmitShiftMemCl(const X86EncodingMap* entry, uint8_t base, int displacement, uint8_t cl); diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc index aec39ab529..0421a5967a 100644 --- a/compiler/dex/quick/x86/fp_x86.cc +++ b/compiler/dex/quick/x86/fp_x86.cc @@ -56,7 +56,7 @@ void X86Mir2Lir::GenArithOpFloat(Instruction::Code opcode, CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmodf), rl_src1, rl_src2, false); } - rl_result = GetReturn(true); + rl_result = GetReturn(kFPReg); StoreValue(rl_dest, rl_result); return; case Instruction::NEG_FLOAT: @@ -118,7 +118,7 @@ void X86Mir2Lir::GenArithOpDouble(Instruction::Code opcode, CallRuntimeHelperRegLocationRegLocation(QUICK_ENTRYPOINT_OFFSET(4, pFmod), rl_src1, rl_src2, false); } - rl_result = GetReturnWide(true); + rl_result = GetReturnWide(kFPReg); StoreValueWide(rl_dest, rl_result); return; case Instruction::NEG_DOUBLE: diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc index 48bff6e6af..1cc16b9e12 100644 --- a/compiler/dex/quick/x86/int_x86.cc +++ b/compiler/dex/quick/x86/int_x86.cc @@ -173,7 +173,10 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { RegLocation rl_result; RegLocation rl_src = mir_graph_->GetSrc(mir, 0); RegLocation rl_dest = mir_graph_->GetDest(mir); - rl_src = LoadValue(rl_src, kCoreReg); + // Avoid using float regs here. + RegisterClass src_reg_class = rl_src.ref ? kRefReg : kCoreReg; + RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg; + rl_src = LoadValue(rl_src, src_reg_class); ConditionCode ccode = mir->meta.ccode; // The kMirOpSelect has two variants, one for constants and one for moves. @@ -182,7 +185,7 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { if (is_constant_case) { int true_val = mir->dalvikInsn.vB; int false_val = mir->dalvikInsn.vC; - rl_result = EvalLoc(rl_dest, kCoreReg, true); + rl_result = EvalLoc(rl_dest, result_reg_class, true); /* * For ccode == kCondEq: @@ -203,6 +206,8 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { * mov t1, $true_case * cmovz result_reg, t1 */ + // FIXME: depending on how you use registers you could get a false != mismatch when dealing + // with different views of the same underlying physical resource (i.e. solo32 vs. solo64). const bool result_reg_same_as_src = (rl_src.location == kLocPhysReg && rl_src.reg.GetReg() == rl_result.reg.GetReg()); const bool true_zero_case = (true_val == 0 && false_val != 0 && !result_reg_same_as_src); @@ -224,7 +229,7 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { if (true_zero_case || false_zero_case || catch_all_case) { ConditionCode cc = true_zero_case ? NegateComparison(ccode) : ccode; int immediateForTemp = true_zero_case ? false_val : true_val; - RegStorage temp1_reg = AllocTemp(); + RegStorage temp1_reg = AllocTypedTemp(false, result_reg_class); OpRegImm(kOpMov, temp1_reg, immediateForTemp); OpCondRegReg(kOpCmov, cc, rl_result.reg, temp1_reg); @@ -234,9 +239,9 @@ void X86Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) { } else { RegLocation rl_true = mir_graph_->GetSrc(mir, 1); RegLocation rl_false = mir_graph_->GetSrc(mir, 2); - rl_true = LoadValue(rl_true, kCoreReg); - rl_false = LoadValue(rl_false, kCoreReg); - rl_result = EvalLoc(rl_dest, kCoreReg, true); + rl_true = LoadValue(rl_true, result_reg_class); + rl_false = LoadValue(rl_false, result_reg_class); + rl_result = EvalLoc(rl_dest, result_reg_class, true); /* * For ccode == kCondEq: @@ -792,8 +797,8 @@ bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) { Clobber(rs_r0); LockTemp(rs_r0); - RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg); - RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg); + RegLocation rl_object = LoadValue(rl_src_obj, kRefReg); + RegLocation rl_new_value = LoadValue(rl_src_new_value); if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) { // Mark card for object assuming new value is stored. @@ -1441,7 +1446,7 @@ void X86Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array, RegisterClass reg_class = RegClassBySize(size); int len_offset = mirror::Array::LengthOffset().Int32Value(); RegLocation rl_result; - rl_array = LoadValue(rl_array, kCoreReg); + rl_array = LoadValue(rl_array, kRefReg); int data_offset; if (size == k64 || size == kDouble) { @@ -1497,7 +1502,7 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array, data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value(); } - rl_array = LoadValue(rl_array, kCoreReg); + rl_array = LoadValue(rl_array, kRefReg); bool constant_index = rl_index.is_const; int32_t constant_index_value = 0; if (!constant_index) { @@ -1880,7 +1885,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, // question with simple comparisons. Use compares to memory and SETEQ to optimize for x86. void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src) { - RegLocation object = LoadValue(rl_src, kCoreReg); + RegLocation object = LoadValue(rl_src, kRefReg); RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true); RegStorage result_reg = rl_result.reg; @@ -1894,7 +1899,7 @@ void X86Mir2Lir::GenInstanceofFinal(bool use_declaring_class, uint32_t type_idx, LoadConstant(result_reg, 0); LIR* null_branchover = OpCmpImmBranch(kCondEq, object.reg, 0, NULL); - RegStorage check_class = AllocTypedTemp(false, kCoreReg); + RegStorage check_class = AllocTypedTemp(false, kRefReg); // If Method* is already in a register, we can save a copy. RegLocation rl_method = mir_graph_->GetMethodLoc(); @@ -1972,8 +1977,8 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k LoadRefDisp(TargetReg(kArg1), mirror::ArtMethod::DexCacheResolvedTypesOffset().Int32Value(), class_reg); int32_t offset_of_type = - mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + (sizeof(mirror::HeapReference<mirror::Class*>) - * type_idx); + mirror::Array::DataOffset(sizeof(mirror::HeapReference<mirror::Class*>)).Int32Value() + + (sizeof(mirror::HeapReference<mirror::Class*>) * type_idx); LoadRefDisp(class_reg, offset_of_type, class_reg); if (!can_assume_type_is_in_dex_cache) { // Need to test presence of type in dex cache at runtime. @@ -1992,7 +1997,7 @@ void X86Mir2Lir::GenInstanceofCallingHelper(bool needs_access_check, bool type_k } } /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */ - RegLocation rl_result = GetReturn(false); + RegLocation rl_result = GetReturn(kRefReg); // SETcc only works with EAX..EDX. DCHECK_LT(rl_result.reg.GetRegNum(), 4); diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 19ad2f8ff1..4d8fd1b283 100644 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -26,124 +26,105 @@ namespace art { -static const RegStorage core_regs_arr_32[] = { +static constexpr RegStorage core_regs_arr_32[] = { rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI, }; -static const RegStorage core_regs_arr_64[] = { - rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_64, rs_rBP, rs_rSI, rs_rDI, +static constexpr RegStorage core_regs_arr_64[] = { + rs_rAX, rs_rCX, rs_rDX, rs_rBX, rs_rX86_SP_32, rs_rBP, rs_rSI, rs_rDI, #ifdef TARGET_REX_SUPPORT rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15 #endif }; -static const RegStorage core_regs_arr_64q[] = { +static constexpr RegStorage core_regs_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r3q, rs_rX86_SP_64, rs_r5q, rs_r6q, rs_r7q, #ifdef TARGET_REX_SUPPORT - rs_r8, rs_r9, rs_r10, rs_r11, rs_r12, rs_r13, rs_r14, rs_r15 + rs_r8q, rs_r9q, rs_r10q, rs_r11q, rs_r12q, rs_r13q, rs_r14q, rs_r15q #endif }; -static const RegStorage sp_regs_arr_32[] = { +static constexpr RegStorage sp_regs_arr_32[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, }; -static const RegStorage sp_regs_arr_64[] = { +static constexpr RegStorage sp_regs_arr_64[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, #ifdef TARGET_REX_SUPPORT rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 #endif }; -static const RegStorage dp_regs_arr_32[] = { +static constexpr RegStorage dp_regs_arr_32[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, }; -static const RegStorage dp_regs_arr_64[] = { +static constexpr RegStorage dp_regs_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, #ifdef TARGET_REX_SUPPORT rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 #endif }; -static const RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; -static const RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_64}; -static const RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; -static const RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX}; -static const RegStorage core_temps_arr_64[] = { +static constexpr RegStorage reserved_regs_arr_32[] = {rs_rX86_SP_32}; +static constexpr RegStorage reserved_regs_arr_64[] = {rs_rX86_SP_32}; +static constexpr RegStorage reserved_regs_arr_64q[] = {rs_rX86_SP_64}; +static constexpr RegStorage core_temps_arr_32[] = {rs_rAX, rs_rCX, rs_rDX, rs_rBX}; +static constexpr RegStorage core_temps_arr_64[] = { rs_rAX, rs_rCX, rs_rDX, rs_rSI, rs_rDI, #ifdef TARGET_REX_SUPPORT rs_r8, rs_r9, rs_r10, rs_r11 #endif }; -static const RegStorage core_temps_arr_64q[] = { +static constexpr RegStorage core_temps_arr_64q[] = { rs_r0q, rs_r1q, rs_r2q, rs_r6q, rs_r7q, #ifdef TARGET_REX_SUPPORT rs_r8q, rs_r9q, rs_r10q, rs_r11q #endif }; -static const RegStorage sp_temps_arr_32[] = { +static constexpr RegStorage sp_temps_arr_32[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, }; -static const RegStorage sp_temps_arr_64[] = { +static constexpr RegStorage sp_temps_arr_64[] = { rs_fr0, rs_fr1, rs_fr2, rs_fr3, rs_fr4, rs_fr5, rs_fr6, rs_fr7, #ifdef TARGET_REX_SUPPORT rs_fr8, rs_fr9, rs_fr10, rs_fr11, rs_fr12, rs_fr13, rs_fr14, rs_fr15 #endif }; -static const RegStorage dp_temps_arr_32[] = { +static constexpr RegStorage dp_temps_arr_32[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, }; -static const RegStorage dp_temps_arr_64[] = { +static constexpr RegStorage dp_temps_arr_64[] = { rs_dr0, rs_dr1, rs_dr2, rs_dr3, rs_dr4, rs_dr5, rs_dr6, rs_dr7, #ifdef TARGET_REX_SUPPORT rs_dr8, rs_dr9, rs_dr10, rs_dr11, rs_dr12, rs_dr13, rs_dr14, rs_dr15 #endif }; -static const RegStorage xp_temps_arr_32[] = { +static constexpr RegStorage xp_temps_arr_32[] = { rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, }; -static const RegStorage xp_temps_arr_64[] = { +static constexpr RegStorage xp_temps_arr_64[] = { rs_xr0, rs_xr1, rs_xr2, rs_xr3, rs_xr4, rs_xr5, rs_xr6, rs_xr7, #ifdef TARGET_REX_SUPPORT rs_xr8, rs_xr9, rs_xr10, rs_xr11, rs_xr12, rs_xr13, rs_xr14, rs_xr15 #endif }; -static const std::vector<RegStorage> empty_pool; -static const std::vector<RegStorage> core_regs_32(core_regs_arr_32, - core_regs_arr_32 + sizeof(core_regs_arr_32) / sizeof(core_regs_arr_32[0])); -static const std::vector<RegStorage> core_regs_64(core_regs_arr_64, - core_regs_arr_64 + sizeof(core_regs_arr_64) / sizeof(core_regs_arr_64[0])); -static const std::vector<RegStorage> core_regs_64q(core_regs_arr_64q, - core_regs_arr_64q + sizeof(core_regs_arr_64q) / sizeof(core_regs_arr_64q[0])); -static const std::vector<RegStorage> sp_regs_32(sp_regs_arr_32, - sp_regs_arr_32 + sizeof(sp_regs_arr_32) / sizeof(sp_regs_arr_32[0])); -static const std::vector<RegStorage> sp_regs_64(sp_regs_arr_64, - sp_regs_arr_64 + sizeof(sp_regs_arr_64) / sizeof(sp_regs_arr_64[0])); -static const std::vector<RegStorage> dp_regs_32(dp_regs_arr_32, - dp_regs_arr_32 + sizeof(dp_regs_arr_32) / sizeof(dp_regs_arr_32[0])); -static const std::vector<RegStorage> dp_regs_64(dp_regs_arr_64, - dp_regs_arr_64 + sizeof(dp_regs_arr_64) / sizeof(dp_regs_arr_64[0])); -static const std::vector<RegStorage> reserved_regs_32(reserved_regs_arr_32, - reserved_regs_arr_32 + sizeof(reserved_regs_arr_32) / sizeof(reserved_regs_arr_32[0])); -static const std::vector<RegStorage> reserved_regs_64(reserved_regs_arr_64, - reserved_regs_arr_64 + sizeof(reserved_regs_arr_64) / sizeof(reserved_regs_arr_64[0])); -static const std::vector<RegStorage> reserved_regs_64q(reserved_regs_arr_64q, - reserved_regs_arr_64q + sizeof(reserved_regs_arr_64q) / sizeof(reserved_regs_arr_64q[0])); -static const std::vector<RegStorage> core_temps_32(core_temps_arr_32, - core_temps_arr_32 + sizeof(core_temps_arr_32) / sizeof(core_temps_arr_32[0])); -static const std::vector<RegStorage> core_temps_64(core_temps_arr_64, - core_temps_arr_64 + sizeof(core_temps_arr_64) / sizeof(core_temps_arr_64[0])); -static const std::vector<RegStorage> core_temps_64q(core_temps_arr_64q, - core_temps_arr_64q + sizeof(core_temps_arr_64q) / sizeof(core_temps_arr_64q[0])); -static const std::vector<RegStorage> sp_temps_32(sp_temps_arr_32, - sp_temps_arr_32 + sizeof(sp_temps_arr_32) / sizeof(sp_temps_arr_32[0])); -static const std::vector<RegStorage> sp_temps_64(sp_temps_arr_64, - sp_temps_arr_64 + sizeof(sp_temps_arr_64) / sizeof(sp_temps_arr_64[0])); -static const std::vector<RegStorage> dp_temps_32(dp_temps_arr_32, - dp_temps_arr_32 + sizeof(dp_temps_arr_32) / sizeof(dp_temps_arr_32[0])); -static const std::vector<RegStorage> dp_temps_64(dp_temps_arr_64, - dp_temps_arr_64 + sizeof(dp_temps_arr_64) / sizeof(dp_temps_arr_64[0])); - -static const std::vector<RegStorage> xp_temps_32(xp_temps_arr_32, - xp_temps_arr_32 + sizeof(xp_temps_arr_32) / sizeof(xp_temps_arr_32[0])); -static const std::vector<RegStorage> xp_temps_64(xp_temps_arr_64, - xp_temps_arr_64 + sizeof(xp_temps_arr_64) / sizeof(xp_temps_arr_64[0])); +static constexpr ArrayRef<const RegStorage> empty_pool; +static constexpr ArrayRef<const RegStorage> core_regs_32(core_regs_arr_32); +static constexpr ArrayRef<const RegStorage> core_regs_64(core_regs_arr_64); +static constexpr ArrayRef<const RegStorage> core_regs_64q(core_regs_arr_64q); +static constexpr ArrayRef<const RegStorage> sp_regs_32(sp_regs_arr_32); +static constexpr ArrayRef<const RegStorage> sp_regs_64(sp_regs_arr_64); +static constexpr ArrayRef<const RegStorage> dp_regs_32(dp_regs_arr_32); +static constexpr ArrayRef<const RegStorage> dp_regs_64(dp_regs_arr_64); +static constexpr ArrayRef<const RegStorage> reserved_regs_32(reserved_regs_arr_32); +static constexpr ArrayRef<const RegStorage> reserved_regs_64(reserved_regs_arr_64); +static constexpr ArrayRef<const RegStorage> reserved_regs_64q(reserved_regs_arr_64q); +static constexpr ArrayRef<const RegStorage> core_temps_32(core_temps_arr_32); +static constexpr ArrayRef<const RegStorage> core_temps_64(core_temps_arr_64); +static constexpr ArrayRef<const RegStorage> core_temps_64q(core_temps_arr_64q); +static constexpr ArrayRef<const RegStorage> sp_temps_32(sp_temps_arr_32); +static constexpr ArrayRef<const RegStorage> sp_temps_64(sp_temps_arr_64); +static constexpr ArrayRef<const RegStorage> dp_temps_32(dp_temps_arr_32); +static constexpr ArrayRef<const RegStorage> dp_temps_64(dp_temps_arr_64); + +static constexpr ArrayRef<const RegStorage> xp_temps_32(xp_temps_arr_32); +static constexpr ArrayRef<const RegStorage> xp_temps_64(xp_temps_arr_64); RegStorage rs_rX86_SP; @@ -177,6 +158,11 @@ RegLocation X86Mir2Lir::LocCReturn() { return x86_loc_c_return; } +RegLocation X86Mir2Lir::LocCReturnRef() { + // FIXME: return x86_loc_c_return_wide for x86_64 when wide refs supported. + return x86_loc_c_return; +} + RegLocation X86Mir2Lir::LocCReturnWide() { return x86_loc_c_return_wide; } @@ -565,9 +551,9 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { void X86Mir2Lir::CompilerInitializeRegAlloc() { if (Gen64Bit()) { - reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, empty_pool/*core_regs_64q*/, sp_regs_64, - dp_regs_64, reserved_regs_64, empty_pool/*reserved_regs_64q*/, - core_temps_64, empty_pool/*core_temps_64q*/, sp_temps_64, dp_temps_64); + reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_64, core_regs_64q, sp_regs_64, + dp_regs_64, reserved_regs_64, reserved_regs_64q, + core_temps_64, core_temps_64q, sp_temps_64, dp_temps_64); } else { reg_pool_ = new (arena_) RegisterPool(this, arena_, core_regs_32, empty_pool, sp_regs_32, dp_regs_32, reserved_regs_32, empty_pool, @@ -577,7 +563,7 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { // Target-specific adjustments. // Add in XMM registers. - const std::vector<RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32; + const ArrayRef<const RegStorage> *xp_temps = Gen64Bit() ? &xp_temps_64 : &xp_temps_32; for (RegStorage reg : *xp_temps) { RegisterInfo* info = new (arena_) RegisterInfo(reg, GetRegMaskCommon(reg)); reginfo_map_.Put(reg.GetReg(), info); @@ -597,10 +583,28 @@ void X86Mir2Lir::CompilerInitializeRegAlloc() { // Redirect 32-bit vector's master storage to 128-bit vector. info->SetMaster(xp_reg_info); - RegStorage dp_reg = RegStorage::Solo64(RegStorage::kFloatingPoint | sp_reg_num); + RegStorage dp_reg = RegStorage::FloatSolo64(sp_reg_num); RegisterInfo* dp_reg_info = GetRegInfo(dp_reg); // Redirect 64-bit vector's master storage to 128-bit vector. dp_reg_info->SetMaster(xp_reg_info); + // Singles should show a single 32-bit mask bit, at first referring to the low half. + DCHECK_EQ(info->StorageMask(), 0x1U); + } + + if (Gen64Bit()) { + // Alias 32bit W registers to corresponding 64bit X registers. + GrowableArray<RegisterInfo*>::Iterator w_it(®_pool_->core_regs_); + for (RegisterInfo* info = w_it.Next(); info != nullptr; info = w_it.Next()) { + int x_reg_num = info->GetReg().GetRegNum(); + RegStorage x_reg = RegStorage::Solo64(x_reg_num); + RegisterInfo* x_reg_info = GetRegInfo(x_reg); + // 64bit X register's master storage should refer to itself. + DCHECK_EQ(x_reg_info, x_reg_info->Master()); + // Redirect 32bit W master storage to 64bit X. + info->SetMaster(x_reg_info); + // 32bit W should show a single 32-bit mask bit, at first referring to the low half. + DCHECK_EQ(info->StorageMask(), 0x1U); + } } // Don't start allocating temps at r0/s0/d0 or you may clobber return regs in early-exit methods. @@ -981,7 +985,7 @@ bool X86Mir2Lir::GenInlinedIndexOf(CallInfo* info, bool zero_based) { } // Okay, we are commited to inlining this. - RegLocation rl_return = GetReturn(false); + RegLocation rl_return = GetReturn(kCoreReg); RegLocation rl_dest = InlineTarget(info); // Is the string non-NULL? diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc index fed31c1f59..618b3a5987 100644 --- a/compiler/dex/quick/x86/utility_x86.cc +++ b/compiler/dex/quick/x86/utility_x86.cc @@ -89,7 +89,11 @@ LIR* X86Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) { res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg()); } else { // Note, there is no byte immediate form of a 32 bit immediate move. - res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value); + if (r_dest.Is64Bit()) { + res = NewLIR2(kX86Mov64RI, r_dest.GetReg(), value); + } else { + res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value); + } } if (r_dest_save.IsFloat()) { @@ -181,7 +185,6 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) { LOG(FATAL) << "Bad case in OpRegImm " << op; } } - CHECK(!r_dest_src1.Is64Bit() || X86Mir2Lir::EncodingMap[opcode].kind == kReg64Imm) << "OpRegImm(" << op << ")"; return NewLIR2(opcode, r_dest_src1.GetReg(), value); } @@ -559,7 +562,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { // We don't know the proper offset for the value, so pick one that will force // 4 byte offset. We will fix this up in the assembler later to have the right // value. - res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::Solo64(low_reg_val), + res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val), kDouble); res->target = data_target; res->flags.fixup = kFixupLoad; @@ -569,7 +572,7 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) { if (val_lo == 0) { res = NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val); } else { - res = LoadConstantNoClobber(RegStorage::Solo32(low_reg_val), val_lo); + res = LoadConstantNoClobber(RegStorage::FloatSolo32(low_reg_val), val_lo); } if (val_hi != 0) { RegStorage r_dest_hi = AllocTempDouble(); @@ -866,7 +869,7 @@ void X86Mir2Lir::AnalyzeBB(BasicBlock * bb) { for (MIR *mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { int opcode = mir->dalvikInsn.opcode; - if (opcode >= kMirOpFirst) { + if (MIRGraph::IsPseudoMirOp(opcode)) { AnalyzeExtendedMIR(opcode, bb, mir); } else { AnalyzeMIR(opcode, bb, mir); diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index 964422ce4c..bb8df893f8 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -175,6 +175,16 @@ enum X86NativeRegisterPool { fr5 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 5, fr6 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 6, fr7 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 7, +#ifdef TARGET_REX_SUPPORT + fr8 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 8, + fr9 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 9, + fr10 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 10, + fr11 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 11, + fr12 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 12, + fr13 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 13, + fr14 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 14, + fr15 = RegStorage::k32BitSolo | RegStorage::kFloatingPoint | 15, +#endif // xmm registers, double precision aliases. dr0 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 0, @@ -185,8 +195,18 @@ enum X86NativeRegisterPool { dr5 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 5, dr6 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 6, dr7 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 7, +#ifdef TARGET_REX_SUPPORT + dr8 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 8, + dr9 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 9, + dr10 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 10, + dr11 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 11, + dr12 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 12, + dr13 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 13, + dr14 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 14, + dr15 = RegStorage::k64BitSolo | RegStorage::kFloatingPoint | 15, +#endif - // xmm registers aliases. + // xmm registers, quad precision aliases xr0 = RegStorage::k128BitSolo | 0, xr1 = RegStorage::k128BitSolo | 1, xr2 = RegStorage::k128BitSolo | 2, @@ -195,6 +215,16 @@ enum X86NativeRegisterPool { xr5 = RegStorage::k128BitSolo | 5, xr6 = RegStorage::k128BitSolo | 6, xr7 = RegStorage::k128BitSolo | 7, +#ifdef TARGET_REX_SUPPORT + xr8 = RegStorage::k128BitSolo | 8, + xr9 = RegStorage::k128BitSolo | 9, + xr10 = RegStorage::k128BitSolo | 10, + xr11 = RegStorage::k128BitSolo | 11, + xr12 = RegStorage::k128BitSolo | 12, + xr13 = RegStorage::k128BitSolo | 13, + xr14 = RegStorage::k128BitSolo | 14, + xr15 = RegStorage::k128BitSolo | 15, +#endif // TODO: as needed, add 256, 512 and 1024-bit xmm views. }; @@ -251,6 +281,16 @@ constexpr RegStorage rs_fr4(RegStorage::kValid | fr4); constexpr RegStorage rs_fr5(RegStorage::kValid | fr5); constexpr RegStorage rs_fr6(RegStorage::kValid | fr6); constexpr RegStorage rs_fr7(RegStorage::kValid | fr7); +#ifdef TARGET_REX_SUPPORT +constexpr RegStorage rs_fr8(RegStorage::kValid | fr8); +constexpr RegStorage rs_fr9(RegStorage::kValid | fr9); +constexpr RegStorage rs_fr10(RegStorage::kValid | fr10); +constexpr RegStorage rs_fr11(RegStorage::kValid | fr11); +constexpr RegStorage rs_fr12(RegStorage::kValid | fr12); +constexpr RegStorage rs_fr13(RegStorage::kValid | fr13); +constexpr RegStorage rs_fr14(RegStorage::kValid | fr14); +constexpr RegStorage rs_fr15(RegStorage::kValid | fr15); +#endif constexpr RegStorage rs_dr0(RegStorage::kValid | dr0); constexpr RegStorage rs_dr1(RegStorage::kValid | dr1); @@ -260,6 +300,16 @@ constexpr RegStorage rs_dr4(RegStorage::kValid | dr4); constexpr RegStorage rs_dr5(RegStorage::kValid | dr5); constexpr RegStorage rs_dr6(RegStorage::kValid | dr6); constexpr RegStorage rs_dr7(RegStorage::kValid | dr7); +#ifdef TARGET_REX_SUPPORT +constexpr RegStorage rs_dr8(RegStorage::kValid | dr8); +constexpr RegStorage rs_dr9(RegStorage::kValid | dr9); +constexpr RegStorage rs_dr10(RegStorage::kValid | dr10); +constexpr RegStorage rs_dr11(RegStorage::kValid | dr11); +constexpr RegStorage rs_dr12(RegStorage::kValid | dr12); +constexpr RegStorage rs_dr13(RegStorage::kValid | dr13); +constexpr RegStorage rs_dr14(RegStorage::kValid | dr14); +constexpr RegStorage rs_dr15(RegStorage::kValid | dr15); +#endif constexpr RegStorage rs_xr0(RegStorage::kValid | xr0); constexpr RegStorage rs_xr1(RegStorage::kValid | xr1); @@ -269,6 +319,16 @@ constexpr RegStorage rs_xr4(RegStorage::kValid | xr4); constexpr RegStorage rs_xr5(RegStorage::kValid | xr5); constexpr RegStorage rs_xr6(RegStorage::kValid | xr6); constexpr RegStorage rs_xr7(RegStorage::kValid | xr7); +#ifdef TARGET_REX_SUPPORT +constexpr RegStorage rs_xr8(RegStorage::kValid | xr8); +constexpr RegStorage rs_xr9(RegStorage::kValid | xr9); +constexpr RegStorage rs_xr10(RegStorage::kValid | xr10); +constexpr RegStorage rs_xr11(RegStorage::kValid | xr11); +constexpr RegStorage rs_xr12(RegStorage::kValid | xr12); +constexpr RegStorage rs_xr13(RegStorage::kValid | xr13); +constexpr RegStorage rs_xr14(RegStorage::kValid | xr14); +constexpr RegStorage rs_xr15(RegStorage::kValid | xr15); +#endif extern X86NativeRegisterPool rX86_ARG0; extern X86NativeRegisterPool rX86_ARG1; @@ -351,10 +411,14 @@ enum X86OpCode { opcode ## 16RR, opcode ## 16RM, opcode ## 16RA, opcode ## 16RT, \ opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, opcode ## 16TI, \ opcode ## 16RI8, opcode ## 16MI8, opcode ## 16AI8, opcode ## 16TI8, \ - opcode ## 32MR, opcode ## 64MR, opcode ## 32AR, opcode ## 64AR, opcode ## 32TR, \ - opcode ## 32RR, opcode ## 32RM, opcode ## 64RM, opcode ## 32RA, opcode ## 64RA, opcode ## 32RT, opcode ## 64RT, \ - opcode ## 32RI, opcode ## 64RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \ - opcode ## 32RI8, opcode ## 64RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8 + opcode ## 32MR, opcode ## 32AR, opcode ## 32TR, \ + opcode ## 32RR, opcode ## 32RM, opcode ## 32RA, opcode ## 32RT, \ + opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, opcode ## 32TI, \ + opcode ## 32RI8, opcode ## 32MI8, opcode ## 32AI8, opcode ## 32TI8, \ + opcode ## 64MR, opcode ## 64AR, opcode ## 64TR, \ + opcode ## 64RR, opcode ## 64RM, opcode ## 64RA, opcode ## 64RT, \ + opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, opcode ## 64TI, \ + opcode ## 64RI8, opcode ## 64MI8, opcode ## 64AI8, opcode ## 64TI8 BinaryOpCode(kX86Add), BinaryOpCode(kX86Or), BinaryOpCode(kX86Adc), @@ -367,23 +431,32 @@ enum X86OpCode { kX86Imul16RRI, kX86Imul16RMI, kX86Imul16RAI, kX86Imul32RRI, kX86Imul32RMI, kX86Imul32RAI, kX86Imul32RRI8, kX86Imul32RMI8, kX86Imul32RAI8, + kX86Imul64RRI, kX86Imul64RMI, kX86Imul64RAI, + kX86Imul64RRI8, kX86Imul64RMI8, kX86Imul64RAI8, kX86Mov8MR, kX86Mov8AR, kX86Mov8TR, kX86Mov8RR, kX86Mov8RM, kX86Mov8RA, kX86Mov8RT, kX86Mov8RI, kX86Mov8MI, kX86Mov8AI, kX86Mov8TI, kX86Mov16MR, kX86Mov16AR, kX86Mov16TR, kX86Mov16RR, kX86Mov16RM, kX86Mov16RA, kX86Mov16RT, kX86Mov16RI, kX86Mov16MI, kX86Mov16AI, kX86Mov16TI, - kX86Mov32MR, kX86Mov64MR, kX86Mov32AR, kX86Mov64AR, kX86Mov32TR, - kX86Mov32RR, kX86Mov32RM, kX86Mov64RM, kX86Mov32RA, kX86Mov64RA, kX86Mov32RT, kX86Mov64RT, - kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Mov64TI, + kX86Mov32MR, kX86Mov32AR, kX86Mov32TR, + kX86Mov32RR, kX86Mov32RM, kX86Mov32RA, kX86Mov32RT, + kX86Mov32RI, kX86Mov32MI, kX86Mov32AI, kX86Mov32TI, kX86Lea32RM, kX86Lea32RA, + kX86Mov64MR, kX86Mov64AR, kX86Mov64TR, + kX86Mov64RR, kX86Mov64RM, kX86Mov64RA, kX86Mov64RT, + kX86Mov64RI, kX86Mov64MI, kX86Mov64AI, kX86Mov64TI, + kX86Lea64RM, + kX86Lea64RA, // RRC - Register Register ConditionCode - cond_opcode reg1, reg2 // - lir operands - 0: reg1, 1: reg2, 2: CC kX86Cmov32RRC, + kX86Cmov64RRC, // RMC - Register Memory ConditionCode - cond_opcode reg1, [base + disp] // - lir operands - 0: reg1, 1: base, 2: disp 3: CC kX86Cmov32RMC, + kX86Cmov64RMC, // RC - Register CL - opcode reg, CL // - lir operands - 0: reg, 1: CL @@ -397,7 +470,9 @@ enum X86OpCode { opcode ## 16RI, opcode ## 16MI, opcode ## 16AI, \ opcode ## 16RC, opcode ## 16MC, opcode ## 16AC, \ opcode ## 32RI, opcode ## 32MI, opcode ## 32AI, \ - opcode ## 32RC, opcode ## 32MC, opcode ## 32AC + opcode ## 32RC, opcode ## 32MC, opcode ## 32AC, \ + opcode ## 64RI, opcode ## 64MI, opcode ## 64AI, \ + opcode ## 64RC, opcode ## 64MC, opcode ## 64AC BinaryShiftOpCode(kX86Rol), BinaryShiftOpCode(kX86Ror), BinaryShiftOpCode(kX86Rcl), @@ -411,12 +486,18 @@ enum X86OpCode { kX86Shld32MRI, kX86Shrd32RRI, kX86Shrd32MRI, + kX86Shld64RRI, + kX86Shld64MRI, + kX86Shrd64RRI, + kX86Shrd64MRI, #define UnaryOpcode(opcode, reg, mem, array) \ opcode ## 8 ## reg, opcode ## 8 ## mem, opcode ## 8 ## array, \ opcode ## 16 ## reg, opcode ## 16 ## mem, opcode ## 16 ## array, \ - opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array + opcode ## 32 ## reg, opcode ## 32 ## mem, opcode ## 32 ## array, \ + opcode ## 64 ## reg, opcode ## 64 ## mem, opcode ## 64 ## array UnaryOpcode(kX86Test, RI, MI, AI), kX86Test32RR, + kX86Test64RR, UnaryOpcode(kX86Not, R, M, A), UnaryOpcode(kX86Neg, R, M, A), UnaryOpcode(kX86Mul, DaR, DaM, DaA), @@ -544,20 +625,20 @@ enum X86OpCode { /* Instruction assembly field_loc kind */ enum X86EncodingKind { - kData, // Special case for raw data. - kNop, // Special case for variable length nop. - kNullary, // Opcode that takes no arguments. - kPrefix2Nullary, // Opcode that takes no arguments, but 2 prefixes. - kRegOpcode, // Shorter form of R instruction kind (opcode+rd) - kReg, kReg64, kMem, kArray, // R, M and A instruction kinds. - kMemReg, kMemReg64, kArrayReg, kArrayReg64, kThreadReg, // MR, AR and TR instruction kinds. - kRegReg, kRegMem, kRegArray, kRegThread, kReg64Thread, // RR, RM, RA and RT instruction kinds. - kRegRegStore, // RR following the store modrm reg-reg encoding rather than the load. - kRegImm, kReg64Imm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds. - kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds. - kMovRegImm, // Shorter form move RI. - kRegRegImmRev, // RRI with first reg in r/m - kMemRegImm, // MRI instruction kinds. + kData, // Special case for raw data. + kNop, // Special case for variable length nop. + kNullary, // Opcode that takes no arguments. + kPrefix2Nullary, // Opcode that takes no arguments, but 2 prefixes. + kRegOpcode, // Shorter form of R instruction kind (opcode+rd) + kReg, kMem, kArray, // R, M and A instruction kinds. + kMemReg, kArrayReg, kThreadReg, // MR, AR and TR instruction kinds. + kRegReg, kRegMem, kRegArray, kRegThread, // RR, RM, RA and RT instruction kinds. + kRegRegStore, // RR following the store modrm reg-reg encoding rather than the load. + kRegImm, kMemImm, kArrayImm, kThreadImm, // RI, MI, AI and TI instruction kinds. + kRegRegImm, kRegMemImm, kRegArrayImm, // RRI, RMI and RAI instruction kinds. + kMovRegImm, // Shorter form move RI. + kRegRegImmRev, // RRI with first reg in r/m + kMemRegImm, // MRI instruction kinds. kShiftRegImm, kShiftMemImm, kShiftArrayImm, // Shift opcode with immediate. kShiftRegCl, kShiftMemCl, kShiftArrayCl, // Shift opcode with register CL. kRegRegReg, kRegRegMem, kRegRegArray, // RRR, RRM, RRA instruction kinds. @@ -607,6 +688,15 @@ struct X86EncodingMap { // 64 Bit Operand Size #define REX_W 0x48 // Extension of the ModR/M reg field +#define REX_R 0x44 +// Extension of the SIB index field +#define REX_X 0x42 +// Extension of the ModR/M r/m field, SIB base field, or Opcode reg field +#define REX_B 0x41 +// Mask extracting the least 3 bits of r0..r15 +#define kRegNumMask32 0x07 +// Value indicating that base or reg is not used +#define NO_REG 0 #define IS_SIMM8(v) ((-128 <= (v)) && ((v) <= 127)) #define IS_SIMM16(v) ((-32768 <= (v)) && ((v) <= 32767)) diff --git a/compiler/dex/reg_storage.h b/compiler/dex/reg_storage.h index 2f7e701219..7e50c311da 100644 --- a/compiler/dex/reg_storage.h +++ b/compiler/dex/reg_storage.h @@ -225,24 +225,6 @@ class RegStorage { return reg_ & kRegNumMask; } - // Aliased double to low single. - RegStorage DoubleToLowSingle() const { - DCHECK(IsDouble()); - return FloatSolo32(GetRegNum() << 1); - } - - // Aliased double to high single. - RegStorage DoubleToHighSingle() const { - DCHECK(IsDouble()); - return FloatSolo32((GetRegNum() << 1) + 1); - } - - // Single to aliased double. - RegStorage SingleToDouble() const { - DCHECK(IsSingle()); - return FloatSolo64(GetRegNum() >> 1); - } - // Is register number in 0..7? bool Low8() const { return GetRegNum() < 8; diff --git a/compiler/dex/ssa_transformation.cc b/compiler/dex/ssa_transformation.cc index 0c5a4ca1d1..bd6bc22531 100644 --- a/compiler/dex/ssa_transformation.cc +++ b/compiler/dex/ssa_transformation.cc @@ -244,9 +244,9 @@ bool MIRGraph::ComputeDominanceFrontier(BasicBlock* bb) { /* Calculate DF_up */ for (uint32_t dominated_idx : bb->i_dominated->Indexes()) { - BasicBlock *dominated_bb = GetBasicBlock(dominated_idx); + BasicBlock* dominated_bb = GetBasicBlock(dominated_idx); for (uint32_t df_up_block_idx : dominated_bb->dom_frontier->Indexes()) { - BasicBlock *df_up_block = GetBasicBlock(df_up_block_idx); + BasicBlock* df_up_block = GetBasicBlock(df_up_block_idx); CheckForDominanceFrontier(bb, df_up_block); } } diff --git a/compiler/dex/vreg_analysis.cc b/compiler/dex/vreg_analysis.cc index 95b3d86d5f..c4af9cb55c 100644 --- a/compiler/dex/vreg_analysis.cc +++ b/compiler/dex/vreg_analysis.cc @@ -233,8 +233,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) { // Special-case handling for format 35c/3rc invokes Instruction::Code opcode = mir->dalvikInsn.opcode; - int flags = (static_cast<int>(opcode) >= kNumPackedOpcodes) - ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode); + int flags = IsPseudoMirOp(opcode) ? 0 : Instruction::FlagsOf(mir->dalvikInsn.opcode); if ((flags & Instruction::kInvoke) && (attrs & (DF_FORMAT_35C | DF_FORMAT_3RC))) { DCHECK_EQ(next, 0); @@ -317,8 +316,7 @@ bool MIRGraph::InferTypeAndSize(BasicBlock* bb, MIR* mir, bool changed) { * The Phi set will include all low words or all high * words, so we have to treat them specially. */ - bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) == - kMirOpPhi); + bool is_phi = (static_cast<int>(mir->dalvikInsn.opcode) == kMirOpPhi); RegLocation rl_temp = reg_location_[defs[0]]; bool defined_fp = rl_temp.defined && rl_temp.fp; bool defined_core = rl_temp.defined && rl_temp.core; @@ -425,6 +423,9 @@ void MIRGraph::InitRegLocations() { loc[ct->s_reg_low].defined = true; } + /* Treat Method* as a normal reference */ + loc[GetMethodSReg()].ref = true; + reg_location_ = loc; int num_regs = cu_->num_dalvik_registers; diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 330456180b..8d4e2838a1 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -1135,7 +1135,7 @@ void CompilerDriver::GetCodeAndMethodForDirectCall(InvokeType* type, InvokeType if (dex_method_idx != DexFile::kDexNoIndex) { target_method->dex_method_index = dex_method_idx; } else { - if (compiling_boot) { + if (compiling_boot && !use_dex_cache) { target_method->dex_method_index = method->GetDexMethodIndex(); target_method->dex_file = method->GetDeclaringClass()->GetDexCache()->GetDexFile(); } diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc index 964dfeb5b1..ca956aac36 100644 --- a/compiler/driver/compiler_driver_test.cc +++ b/compiler/driver/compiler_driver_test.cc @@ -173,7 +173,10 @@ TEST_F(CompilerDriverTest, AbstractMethodErrorStub) { env_->ExceptionClear(); jclass jlame = env_->FindClass("java/lang/AbstractMethodError"); EXPECT_TRUE(env_->IsInstanceOf(exception, jlame)); - Thread::Current()->ClearException(); + { + ScopedObjectAccess soa(Thread::Current()); + Thread::Current()->ClearException(); + } } // TODO: need check-cast test (when stub complete & we can throw/catch diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc index e88ed42380..09f2eaea20 100644 --- a/compiler/elf_writer_quick.cc +++ b/compiler/elf_writer_quick.cc @@ -739,8 +739,8 @@ bool ElfWriterQuick::Write(OatWriter* oat_writer, << " for " << elf_file_->GetPath(); return false; } - BufferedOutputStream output_stream(new FileOutputStream(elf_file_)); - if (!oat_writer->Write(&output_stream)) { + std::unique_ptr<BufferedOutputStream> output_stream(new BufferedOutputStream(new FileOutputStream(elf_file_))); + if (!oat_writer->Write(output_stream.get())) { PLOG(ERROR) << "Failed to write .rodata and .text for " << elf_file_->GetPath(); return false; } diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index be53926ac8..ca1239f18d 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -650,34 +650,55 @@ void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) { copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*> (const_cast<byte*>(GetOatAddress(interpreter_to_interpreter_bridge_offset_)))); } else { - copy->SetEntryPointFromInterpreter<kVerifyNone>(reinterpret_cast<EntryPointFromInterpreter*> - (const_cast<byte*>(GetOatAddress(interpreter_to_compiled_code_bridge_offset_)))); // Use original code if it exists. Otherwise, set the code pointer to the resolution // trampoline. + + // Quick entrypoint: const byte* quick_code = GetOatAddress(orig->GetQuickOatCodeOffset()); + bool quick_is_interpreted = false; if (quick_code != nullptr && (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) { // We have code for a non-static or initialized method, just use the code. - copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code); } else if (quick_code == nullptr && orig->IsNative() && (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) { // Non-static or initialized native method missing compiled code, use generic JNI version. - copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_generic_jni_trampoline_offset_)); + quick_code = GetOatAddress(quick_generic_jni_trampoline_offset_); } else if (quick_code == nullptr && !orig->IsNative()) { // We don't have code at all for a non-native method, use the interpreter. - copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_to_interpreter_bridge_offset_)); + quick_code = GetOatAddress(quick_to_interpreter_bridge_offset_); + quick_is_interpreted = true; } else { CHECK(!orig->GetDeclaringClass()->IsInitialized()); // We have code for a static method, but need to go through the resolution stub for class // initialization. - copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(GetOatAddress(quick_resolution_trampoline_offset_)); + quick_code = GetOatAddress(quick_resolution_trampoline_offset_); } + copy->SetEntryPointFromQuickCompiledCode<kVerifyNone>(quick_code); + + // Portable entrypoint: const byte* portable_code = GetOatAddress(orig->GetPortableOatCodeOffset()); - if (portable_code != nullptr) { - copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code); + bool portable_is_interpreted = false; + if (portable_code != nullptr && + (!orig->IsStatic() || orig->IsConstructor() || orig->GetDeclaringClass()->IsInitialized())) { + // We have code for a non-static or initialized method, just use the code. + } else if (portable_code == nullptr && orig->IsNative() && + (!orig->IsStatic() || orig->GetDeclaringClass()->IsInitialized())) { + // Non-static or initialized native method missing compiled code, use generic JNI version. + // TODO: generic JNI support for LLVM. + portable_code = GetOatAddress(portable_resolution_trampoline_offset_); + } else if (portable_code == nullptr && !orig->IsNative()) { + // We don't have code at all for a non-native method, use the interpreter. + portable_code = GetOatAddress(portable_to_interpreter_bridge_offset_); + portable_is_interpreted = true; } else { - copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(GetOatAddress(portable_resolution_trampoline_offset_)); + CHECK(!orig->GetDeclaringClass()->IsInitialized()); + // We have code for a static method, but need to go through the resolution stub for class + // initialization. + portable_code = GetOatAddress(portable_resolution_trampoline_offset_); } + copy->SetEntryPointFromPortableCompiledCode<kVerifyNone>(portable_code); + + // JNI entrypoint: if (orig->IsNative()) { // The native method's pointer is set to a stub to lookup via dlsym. // Note this is not the code_ pointer, that is handled above. @@ -688,6 +709,15 @@ void ImageWriter::FixupMethod(ArtMethod* orig, ArtMethod* copy) { const byte* native_gc_map = GetOatAddress(native_gc_map_offset); copy->SetNativeGcMap<kVerifyNone>(reinterpret_cast<const uint8_t*>(native_gc_map)); } + + // Interpreter entrypoint: + // Set the interpreter entrypoint depending on whether there is compiled code or not. + uint32_t interpreter_code = (quick_is_interpreted && portable_is_interpreted) + ? interpreter_to_interpreter_bridge_offset_ + : interpreter_to_compiled_code_bridge_offset_; + copy->SetEntryPointFromInterpreter<kVerifyNone>( + reinterpret_cast<EntryPointFromInterpreter*>( + const_cast<byte*>(GetOatAddress(interpreter_code)))); } } } @@ -742,30 +772,42 @@ void ImageWriter::PatchOatCodeAndMethods() { const CompilerDriver::CallPatchInformation* patch = code_to_patch[i]; ArtMethod* target = GetTargetMethod(patch); uintptr_t quick_code = reinterpret_cast<uintptr_t>(class_linker->GetQuickOatCodeFor(target)); + DCHECK_NE(quick_code, 0U) << PrettyMethod(target); uintptr_t code_base = reinterpret_cast<uintptr_t>(&oat_file_->GetOatHeader()); uintptr_t code_offset = quick_code - code_base; + bool is_quick_offset = false; + if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge())) { + is_quick_offset = true; + code_offset = quick_to_interpreter_bridge_offset_; + } else if (quick_code == + reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) { + CHECK(target->IsNative()); + is_quick_offset = true; + code_offset = quick_generic_jni_trampoline_offset_; + } + uintptr_t value; if (patch->IsRelative()) { // value to patch is relative to the location being patched const void* quick_oat_code = class_linker->GetQuickOatCodeFor(patch->GetDexFile(), patch->GetReferrerClassDefIdx(), patch->GetReferrerMethodIdx()); + if (is_quick_offset) { + // If its a quick offset it means that we are doing a relative patch from the class linker + // oat_file to the image writer oat_file so we need to adjust the quick oat code to be the + // one in the image writer oat_file. + quick_code = PointerToLowMemUInt32(GetOatAddress(code_offset)); + quick_oat_code = + reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(quick_oat_code) + + reinterpret_cast<uintptr_t>(oat_data_begin_) - code_base); + } uintptr_t base = reinterpret_cast<uintptr_t>(quick_oat_code); uintptr_t patch_location = base + patch->GetLiteralOffset(); - uintptr_t value = quick_code - patch_location + patch->RelativeOffset(); - SetPatchLocation(patch, value); + value = quick_code - patch_location + patch->RelativeOffset(); } else { - if (quick_code == reinterpret_cast<uintptr_t>(GetQuickToInterpreterBridge()) || - quick_code == reinterpret_cast<uintptr_t>(class_linker->GetQuickGenericJniTrampoline())) { - if (target->IsNative()) { - // generic JNI, not interpreter bridge from GetQuickOatCodeFor(). - code_offset = quick_generic_jni_trampoline_offset_; - } else { - code_offset = quick_to_interpreter_bridge_offset_; - } - } - SetPatchLocation(patch, PointerToLowMemUInt32(GetOatAddress(code_offset))); + value = PointerToLowMemUInt32(GetOatAddress(code_offset)); } + SetPatchLocation(patch, value); } const CallPatches& methods_to_patch = compiler_driver_.GetMethodsToPatch(); diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc index 9927fe1aa3..8f4eddbea3 100644 --- a/compiler/jni/jni_compiler_test.cc +++ b/compiler/jni/jni_compiler_test.cc @@ -60,7 +60,7 @@ class JniCompilerTest : public CommonCompilerTest { } else { method = c->FindVirtualMethod(method_name, method_sig); } - ASSERT_TRUE(method != NULL) << method_name << " " << method_sig; + ASSERT_TRUE(method != nullptr) << method_name << " " << method_sig; if (method->GetEntryPointFromQuickCompiledCode() == nullptr) { ASSERT_TRUE(method->GetEntryPointFromPortableCompiledCode() == nullptr); CompileMethod(method); @@ -88,16 +88,16 @@ class JniCompilerTest : public CommonCompilerTest { // JNI operations after runtime start. env_ = Thread::Current()->GetJniEnv(); jklass_ = env_->FindClass("MyClassNatives"); - ASSERT_TRUE(jklass_ != NULL) << method_name << " " << method_sig; + ASSERT_TRUE(jklass_ != nullptr) << method_name << " " << method_sig; if (direct) { jmethod_ = env_->GetStaticMethodID(jklass_, method_name, method_sig); } else { jmethod_ = env_->GetMethodID(jklass_, method_name, method_sig); } - ASSERT_TRUE(jmethod_ != NULL) << method_name << " " << method_sig; + ASSERT_TRUE(jmethod_ != nullptr) << method_name << " " << method_sig; - if (native_fnptr != NULL) { + if (native_fnptr != nullptr) { JNINativeMethod methods[] = { { method_name, method_sig, native_fnptr } }; ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1)) << method_name << " " << method_sig; @@ -107,7 +107,7 @@ class JniCompilerTest : public CommonCompilerTest { jmethodID constructor = env_->GetMethodID(jklass_, "<init>", "()V"); jobj_ = env_->NewObject(jklass_, constructor); - ASSERT_TRUE(jobj_ != NULL) << method_name << " " << method_sig; + ASSERT_TRUE(jobj_ != nullptr) << method_name << " " << method_sig; } public: @@ -125,13 +125,14 @@ jclass JniCompilerTest::jklass_; jobject JniCompilerTest::jobj_; jobject JniCompilerTest::class_loader_; + int gJava_MyClassNatives_foo_calls = 0; void Java_MyClassNatives_foo(JNIEnv* env, jobject thisObj) { // 1 = thisObj EXPECT_EQ(kNative, Thread::Current()->GetState()); Locks::mutator_lock_->AssertNotHeld(Thread::Current()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_foo_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -151,8 +152,8 @@ TEST_F(JniCompilerTest, CompileAndRunNoArgMethod) { TEST_F(JniCompilerTest, CompileAndRunIntMethodThroughStub) { TEST_DISABLED_FOR_PORTABLE(); - SetUpForTest(false, "bar", "(I)I", - NULL /* calling through stub will link with &Java_MyClassNatives_bar */); + SetUpForTest(false, "bar", "(I)I", nullptr); + // calling through stub will link with &Java_MyClassNatives_bar ScopedObjectAccess soa(Thread::Current()); std::string reason; @@ -168,8 +169,8 @@ TEST_F(JniCompilerTest, CompileAndRunIntMethodThroughStub) { TEST_F(JniCompilerTest, CompileAndRunStaticIntMethodThroughStub) { TEST_DISABLED_FOR_PORTABLE(); - SetUpForTest(true, "sbar", "(I)I", - NULL /* calling through stub will link with &Java_MyClassNatives_sbar */); + SetUpForTest(true, "sbar", "(I)I", nullptr); + // calling through stub will link with &Java_MyClassNatives_sbar ScopedObjectAccess soa(Thread::Current()); std::string reason; @@ -188,7 +189,7 @@ jint Java_MyClassNatives_fooI(JNIEnv* env, jobject thisObj, jint x) { // 1 = thisObj EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_fooI_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -215,7 +216,7 @@ jint Java_MyClassNatives_fooII(JNIEnv* env, jobject thisObj, jint x, jint y) { // 1 = thisObj EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_fooII_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -243,7 +244,7 @@ jlong Java_MyClassNatives_fooJJ(JNIEnv* env, jobject thisObj, jlong x, jlong y) // 1 = thisObj EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_fooJJ_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -272,7 +273,7 @@ jdouble Java_MyClassNatives_fooDD(JNIEnv* env, jobject thisObj, jdouble x, jdoub // 1 = thisObj EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_fooDD_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -302,7 +303,7 @@ jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject thisObj, jlong // 1 = thisObj EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_fooJJ_synchronized_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -329,7 +330,7 @@ jobject Java_MyClassNatives_fooIOO(JNIEnv* env, jobject thisObj, jint x, jobject // 3 = this + y + z EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); gJava_MyClassNatives_fooIOO_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -353,28 +354,28 @@ TEST_F(JniCompilerTest, CompileAndRunIntObjectObjectMethod) { reinterpret_cast<void*>(&Java_MyClassNatives_fooIOO)); EXPECT_EQ(0, gJava_MyClassNatives_fooIOO_calls); - jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, NULL, NULL); + jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, nullptr); EXPECT_TRUE(env_->IsSameObject(jobj_, result)); EXPECT_EQ(1, gJava_MyClassNatives_fooIOO_calls); - result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, NULL, jklass_); + result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, jklass_); EXPECT_TRUE(env_->IsSameObject(jobj_, result)); EXPECT_EQ(2, gJava_MyClassNatives_fooIOO_calls); - result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, NULL, jklass_); - EXPECT_TRUE(env_->IsSameObject(NULL, result)); + result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, nullptr, jklass_); + EXPECT_TRUE(env_->IsSameObject(nullptr, result)); EXPECT_EQ(3, gJava_MyClassNatives_fooIOO_calls); - result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, NULL, jklass_); + result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, nullptr, jklass_); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(4, gJava_MyClassNatives_fooIOO_calls); - result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, jklass_, NULL); + result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, jklass_, nullptr); EXPECT_TRUE(env_->IsSameObject(jobj_, result)); EXPECT_EQ(5, gJava_MyClassNatives_fooIOO_calls); - result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, jklass_, NULL); + result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, jklass_, nullptr); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(6, gJava_MyClassNatives_fooIOO_calls); - result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, jklass_, NULL); - EXPECT_TRUE(env_->IsSameObject(NULL, result)); + result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, jklass_, nullptr); + EXPECT_TRUE(env_->IsSameObject(nullptr, result)); EXPECT_EQ(7, gJava_MyClassNatives_fooIOO_calls); } @@ -383,7 +384,7 @@ jint Java_MyClassNatives_fooSII(JNIEnv* env, jclass klass, jint x, jint y) { // 1 = klass EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(klass != NULL); + EXPECT_TRUE(klass != nullptr); EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass)); gJava_MyClassNatives_fooSII_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -407,7 +408,7 @@ jdouble Java_MyClassNatives_fooSDD(JNIEnv* env, jclass klass, jdouble x, jdouble // 1 = klass EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(klass != NULL); + EXPECT_TRUE(klass != nullptr); EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass)); gJava_MyClassNatives_fooSDD_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -437,7 +438,7 @@ jobject Java_MyClassNatives_fooSIOO(JNIEnv* env, jclass klass, jint x, jobject y // 3 = klass + y + z EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(klass != NULL); + EXPECT_TRUE(klass != nullptr); EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass)); gJava_MyClassNatives_fooSIOO_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -462,28 +463,28 @@ TEST_F(JniCompilerTest, CompileAndRunStaticIntObjectObjectMethod) { reinterpret_cast<void*>(&Java_MyClassNatives_fooSIOO)); EXPECT_EQ(0, gJava_MyClassNatives_fooSIOO_calls); - jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, NULL); + jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(1, gJava_MyClassNatives_fooSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, jobj_); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(2, gJava_MyClassNatives_fooSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, NULL, jobj_); - EXPECT_TRUE(env_->IsSameObject(NULL, result)); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_); + EXPECT_TRUE(env_->IsSameObject(nullptr, result)); EXPECT_EQ(3, gJava_MyClassNatives_fooSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, NULL, jobj_); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_); EXPECT_TRUE(env_->IsSameObject(jobj_, result)); EXPECT_EQ(4, gJava_MyClassNatives_fooSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, NULL); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(5, gJava_MyClassNatives_fooSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, NULL); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr); EXPECT_TRUE(env_->IsSameObject(jobj_, result)); EXPECT_EQ(6, gJava_MyClassNatives_fooSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, NULL); - EXPECT_TRUE(env_->IsSameObject(NULL, result)); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr); + EXPECT_TRUE(env_->IsSameObject(nullptr, result)); EXPECT_EQ(7, gJava_MyClassNatives_fooSIOO_calls); } @@ -492,7 +493,7 @@ jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject // 3 = klass + y + z EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(klass != NULL); + EXPECT_TRUE(klass != nullptr); EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass)); gJava_MyClassNatives_fooSSIOO_calls++; ScopedObjectAccess soa(Thread::Current()); @@ -516,28 +517,28 @@ TEST_F(JniCompilerTest, CompileAndRunStaticSynchronizedIntObjectObjectMethod) { reinterpret_cast<void*>(&Java_MyClassNatives_fooSSIOO)); EXPECT_EQ(0, gJava_MyClassNatives_fooSSIOO_calls); - jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, NULL); + jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(1, gJava_MyClassNatives_fooSSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, NULL, jobj_); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(2, gJava_MyClassNatives_fooSSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, NULL, jobj_); - EXPECT_TRUE(env_->IsSameObject(NULL, result)); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_); + EXPECT_TRUE(env_->IsSameObject(nullptr, result)); EXPECT_EQ(3, gJava_MyClassNatives_fooSSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, NULL, jobj_); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_); EXPECT_TRUE(env_->IsSameObject(jobj_, result)); EXPECT_EQ(4, gJava_MyClassNatives_fooSSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, NULL); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr); EXPECT_TRUE(env_->IsSameObject(jklass_, result)); EXPECT_EQ(5, gJava_MyClassNatives_fooSSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, NULL); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr); EXPECT_TRUE(env_->IsSameObject(jobj_, result)); EXPECT_EQ(6, gJava_MyClassNatives_fooSSIOO_calls); - result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, NULL); - EXPECT_TRUE(env_->IsSameObject(NULL, result)); + result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr); + EXPECT_TRUE(env_->IsSameObject(nullptr, result)); EXPECT_EQ(7, gJava_MyClassNatives_fooSSIOO_calls); } @@ -591,7 +592,7 @@ TEST_F(JniCompilerTest, ExceptionHandling) { jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) { if (i <= 0) { - // We want to check raw Object*/Array* below + // We want to check raw Object* / Array* below ScopedObjectAccess soa(env); // Build stack trace @@ -599,7 +600,7 @@ jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) { jobjectArray ste_array = Thread::InternalStackTraceToStackTraceElementArray(soa, internal); mirror::ObjectArray<mirror::StackTraceElement>* trace_array = soa.Decode<mirror::ObjectArray<mirror::StackTraceElement>*>(ste_array); - EXPECT_TRUE(trace_array != NULL); + EXPECT_TRUE(trace_array != nullptr); EXPECT_EQ(11, trace_array->GetLength()); // Check stack trace entries have expected values @@ -615,9 +616,9 @@ jint Java_MyClassNatives_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) { return 0; } else { jclass jklass = env->FindClass("MyClassNatives"); - EXPECT_TRUE(jklass != NULL); + EXPECT_TRUE(jklass != nullptr); jmethodID jmethod = env->GetMethodID(jklass, "fooI", "(I)I"); - EXPECT_TRUE(jmethod != NULL); + EXPECT_TRUE(jmethod != nullptr); // Recurse with i - 1 jint result = env->CallNonvirtualIntMethod(thisObj, jklass, jmethod, i - 1); @@ -721,7 +722,7 @@ TEST_F(JniCompilerTest, GetText) { TEST_F(JniCompilerTest, GetSinkPropertiesNative) { TEST_DISABLED_FOR_PORTABLE(); - SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;", NULL); + SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;", nullptr); // This space intentionally left blank. Just testing compilation succeeds. } @@ -804,7 +805,7 @@ TEST_F(JniCompilerTest, UpcallArgumentTypeChecking_Static) { jfloat Java_MyClassNatives_checkFloats(JNIEnv* env, jobject thisObj, jfloat f1, jfloat f2) { EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); ScopedObjectAccess soa(Thread::Current()); EXPECT_EQ(1U, Thread::Current()->NumStackReferences()); @@ -826,12 +827,12 @@ TEST_F(JniCompilerTest, CompileAndRunFloatFloatMethod) { } void Java_MyClassNatives_checkParameterAlign(JNIEnv* env, jobject thisObj, jint i1, jlong l1) { - /*EXPECT_EQ(kNative, Thread::Current()->GetState()); - EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); - EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); - ScopedObjectAccess soa(Thread::Current()); - EXPECT_EQ(1U, Thread::Current()->NumStackReferences());*/ +// EXPECT_EQ(kNative, Thread::Current()->GetState()); +// EXPECT_EQ(Thread::Current()->GetJniEnv(), env); +// EXPECT_TRUE(thisObj != nullptr); +// EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); +// ScopedObjectAccess soa(Thread::Current()); +// EXPECT_EQ(1U, Thread::Current()->NumStackReferences()); EXPECT_EQ(i1, 1234); EXPECT_EQ(l1, INT64_C(0x12345678ABCDEF0)); } @@ -879,7 +880,7 @@ void Java_MyClassNatives_maxParamNumber(JNIEnv* env, jobject thisObj, jobject o248, jobject o249, jobject o250, jobject o251, jobject o252, jobject o253) { EXPECT_EQ(kNative, Thread::Current()->GetState()); EXPECT_EQ(Thread::Current()->GetJniEnv(), env); - EXPECT_TRUE(thisObj != NULL); + EXPECT_TRUE(thisObj != nullptr); EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_)); ScopedObjectAccess soa(Thread::Current()); EXPECT_GE(255U, Thread::Current()->NumStackReferences()); diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc index 649a80ff68..f0c0ed72bf 100644 --- a/compiler/jni/quick/arm/calling_convention_arm.cc +++ b/compiler/jni/quick/arm/calling_convention_arm.cc @@ -143,9 +143,10 @@ ManagedRegister ArmJniCallingConvention::ReturnScratchRegister() const { size_t ArmJniCallingConvention::FrameSize() { // Method*, LR and callee save area size, local reference segment state - size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize; + size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) + + (2 + CalleeSaveRegisters().size()) * kFramePointerSize; // References plus 2 words for HandleScope header - size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount()); + size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount()); // Plus return value spill area size return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment); } diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc index ffd27ee37d..0a00d7d8ac 100644 --- a/compiler/jni/quick/arm64/calling_convention_arm64.cc +++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc @@ -95,7 +95,7 @@ FrameOffset Arm64ManagedRuntimeCallingConvention::CurrentParamStackOffset() { CHECK(IsCurrentParamOnStack()); FrameOffset result = FrameOffset(displacement_.Int32Value() + // displacement - kFramePointerSize + // Method* + sizeof(StackReference<mirror::ArtMethod>) + // Method ref (itr_slots_ * sizeof(uint32_t))); // offset into in args return result; } @@ -196,9 +196,10 @@ ManagedRegister Arm64JniCallingConvention::ReturnScratchRegister() const { size_t Arm64JniCallingConvention::FrameSize() { // Method*, callee save area size, local reference segment state - size_t frame_data_size = ((1 + CalleeSaveRegisters().size()) * kFramePointerSize) + sizeof(uint32_t); + size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) + + CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t); // References plus 2 words for HandleScope header - size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount()); + size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount()); // Plus return value spill area size return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment); } diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h index 2a6e7d96cd..efc0b42db4 100644 --- a/compiler/jni/quick/calling_convention.h +++ b/compiler/jni/quick/calling_convention.h @@ -319,7 +319,8 @@ class JniCallingConvention : public CallingConvention { // Position of handle scope and interior fields FrameOffset HandleScopeOffset() const { - return FrameOffset(this->displacement_.Int32Value() + frame_pointer_size_); // above Method* + return FrameOffset(this->displacement_.Int32Value() + sizeof(StackReference<mirror::ArtMethod>)); + // above Method reference } FrameOffset HandleScopeLinkOffset() const { diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc index 0402fe6eb8..f7a7be7304 100644 --- a/compiler/jni/quick/mips/calling_convention_mips.cc +++ b/compiler/jni/quick/mips/calling_convention_mips.cc @@ -147,9 +147,10 @@ ManagedRegister MipsJniCallingConvention::ReturnScratchRegister() const { size_t MipsJniCallingConvention::FrameSize() { // Method*, LR and callee save area size, local reference segment state - size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize; + size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) + + (2 + CalleeSaveRegisters().size()) * kFramePointerSize; // References plus 2 words for HandleScope header - size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount()); + size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount()); // Plus return value spill area size return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment); } diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc index 97b4cdf8ac..9bf7d0f071 100644 --- a/compiler/jni/quick/x86/calling_convention_x86.cc +++ b/compiler/jni/quick/x86/calling_convention_x86.cc @@ -124,9 +124,10 @@ uint32_t X86JniCallingConvention::CoreSpillMask() const { size_t X86JniCallingConvention::FrameSize() { // Method*, return address and callee save area size, local reference segment state - size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize; + size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) + + (2 + CalleeSaveRegisters().size()) * kFramePointerSize; // References plus 2 words for HandleScope header - size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount()); + size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount()); // Plus return value spill area size return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment); } diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc index 4871c879a8..5febed24fe 100644 --- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc +++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc @@ -96,7 +96,7 @@ ManagedRegister X86_64ManagedRuntimeCallingConvention::CurrentParamRegister() { FrameOffset X86_64ManagedRuntimeCallingConvention::CurrentParamStackOffset() { return FrameOffset(displacement_.Int32Value() + // displacement - kFramePointerSize + // Method* + sizeof(StackReference<mirror::ArtMethod>) + // Method ref (itr_slots_ * sizeof(uint32_t))); // offset into in args } @@ -139,9 +139,10 @@ uint32_t X86_64JniCallingConvention::CoreSpillMask() const { size_t X86_64JniCallingConvention::FrameSize() { // Method*, return address and callee save area size, local reference segment state - size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kFramePointerSize; + size_t frame_data_size = sizeof(StackReference<mirror::ArtMethod>) + + (2 + CalleeSaveRegisters().size()) * kFramePointerSize; // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header - size_t handle_scope_size = HandleScope::GetAlignedHandleScopeSizeTarget(kFramePointerSize, ReferenceCount()); + size_t handle_scope_size = HandleScope::SizeOf(kFramePointerSize, ReferenceCount()); // Plus return value spill area size return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment); } diff --git a/compiler/llvm/gbc_expander.cc b/compiler/llvm/gbc_expander.cc index 25c9b20514..f8dca66de0 100644 --- a/compiler/llvm/gbc_expander.cc +++ b/compiler/llvm/gbc_expander.cc @@ -1868,6 +1868,10 @@ llvm::Value* GBCExpanderPass::EmitLoadStaticStorage(uint32_t dex_pc, phi->addIncoming(storage_object_addr, block_check_init); phi->addIncoming(loaded_storage_object_addr, block_after_load_static); + + // Ensure load of status and load of value don't re-order. + irb_.CreateMemoryBarrier(art::kLoadLoad); + return phi; } diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 6812f3c9df..49cf71b7eb 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -180,7 +180,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(80U, sizeof(OatHeader)); EXPECT_EQ(8U, sizeof(OatMethodOffsets)); EXPECT_EQ(24U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(80 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 53e7bbe402..7a336204b6 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -26,6 +26,18 @@ namespace art { +static void DumpBitVector(BitVector* vector, + std::ostream& buffer, + size_t count, + const char* prefix) { + buffer << prefix; + buffer << '('; + for (size_t i = 0; i < count; ++i) { + buffer << vector->IsBitSet(i); + } + buffer << ")\n"; +} + static void TestCode(const uint16_t* data, const char* expected) { ArenaPool pool; ArenaAllocator allocator(&pool); @@ -43,12 +55,13 @@ static void TestCode(const uint16_t* data, const char* expected) { for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) { HBasicBlock* block = it.Current(); buffer << "Block " << block->GetBlockId() << std::endl; + size_t ssa_values = liveness.GetNumberOfSsaValues(); BitVector* live_in = liveness.GetLiveInSet(*block); - live_in->Dump(buffer, " live in: "); + DumpBitVector(live_in, buffer, ssa_values, " live in: "); BitVector* live_out = liveness.GetLiveOutSet(*block); - live_out->Dump(buffer, " live out: "); + DumpBitVector(live_out, buffer, ssa_values, " live out: "); BitVector* kill = liveness.GetKillSet(*block); - kill->Dump(buffer, " kill: "); + DumpBitVector(kill, buffer, ssa_values, " kill: "); } ASSERT_STREQ(expected, buffer.str().c_str()); } diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 33084df94b..1284a97cd1 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -95,14 +95,26 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) { // All predecessors have already been visited because we are visiting in reverse post order. // We merge the values of all locals, creating phis if those values differ. for (size_t local = 0; local < current_locals_->Size(); local++) { + bool one_predecessor_has_no_value = false; bool is_different = false; HInstruction* value = ValueOfLocal(block->GetPredecessors().Get(0), local); - for (size_t i = 1; i < block->GetPredecessors().Size(); i++) { - if (ValueOfLocal(block->GetPredecessors().Get(i), local) != value) { + + for (size_t i = 0, e = block->GetPredecessors().Size(); i < e; ++i) { + HInstruction* current = ValueOfLocal(block->GetPredecessors().Get(i), local); + if (current == nullptr) { +// one_predecessor_has_no_value = true; +// break; + } else if (current != value) { is_different = true; - break; } } + + if (one_predecessor_has_no_value) { + // If one predecessor has no value for this local, we trust the verifier has + // successfully checked that there is a store dominating any read after this block. + continue; + } + if (is_different) { HPhi* phi = new (GetGraph()->GetArena()) HPhi( GetGraph()->GetArena(), local, block->GetPredecessors().Size(), Primitive::kPrimVoid); diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index d10461980d..485ea279ad 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -459,4 +459,34 @@ TEST(SsaTest, DeadLocal) { TestCode(data, expected); } +TEST(SsaTest, LocalInIf) { + // Test that we do not create a phi in the join block when one predecessor + // does not update the local. + const char* expected = + "BasicBlock 0, succ: 1\n" + " 0: IntConstant 0 [3, 3]\n" + " 1: IntConstant 4\n" + " 2: Goto\n" + "BasicBlock 1, pred: 0, succ: 2, 5\n" + " 3: Equal(0, 0) [4]\n" + " 4: If(3)\n" + "BasicBlock 2, pred: 1, succ: 3\n" + " 5: Goto\n" + "BasicBlock 3, pred: 2, 5, succ: 4\n" + " 6: ReturnVoid\n" + "BasicBlock 4, pred: 3\n" + " 7: Exit\n" + // Synthesized block to avoid critical edge. + "BasicBlock 5, pred: 1, succ: 3\n" + " 8: Goto\n"; + + const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + Instruction::CONST_4 | 0 | 0, + Instruction::IF_EQ, 3, + Instruction::CONST_4 | 4 << 12 | 1 << 8, + Instruction::RETURN_VOID); + + TestCode(data, expected); +} + } // namespace art diff --git a/compiler/utils/arm64/assembler_arm64.cc b/compiler/utils/arm64/assembler_arm64.cc index 27188b2331..009b227209 100644 --- a/compiler/utils/arm64/assembler_arm64.cc +++ b/compiler/utils/arm64/assembler_arm64.cc @@ -530,7 +530,7 @@ void Arm64Assembler::Call(FrameOffset base, Offset offs, ManagedRegister m_scrat Arm64ManagedRegister scratch = m_scratch.AsArm64(); CHECK(scratch.IsCoreRegister()) << scratch; // Call *(*(SP + base) + offset) - LoadFromOffset(scratch.AsCoreRegister(), SP, base.Int32Value()); + LoadWFromOffset(kLoadWord, scratch.AsOverlappingCoreRegisterLow(), SP, base.Int32Value()); LoadFromOffset(scratch.AsCoreRegister(), scratch.AsCoreRegister(), offs.Int32Value()); ___ Blr(reg_x(scratch.AsCoreRegister())); } @@ -656,16 +656,17 @@ void Arm64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, // trashed by native code. ___ Mov(reg_x(ETR), reg_x(TR)); - // Increate frame to required size - must be at least space to push Method*. + // Increase frame to required size - must be at least space to push StackReference<Method>. CHECK_GT(frame_size, kCalleeSavedRegsSize * kFramePointerSize); size_t adjust = frame_size - (kCalleeSavedRegsSize * kFramePointerSize); IncreaseFrameSize(adjust); - // Write Method*. - StoreToOffset(X0, SP, 0); + // Write StackReference<Method>. + DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>)); + StoreWToOffset(StoreOperandType::kStoreWord, W0, SP, 0); // Write out entry spills - int32_t offset = frame_size + kFramePointerSize; + int32_t offset = frame_size + sizeof(StackReference<mirror::ArtMethod>); for (size_t i = 0; i < entry_spills.size(); ++i) { Arm64ManagedRegister reg = entry_spills.at(i).AsArm64(); if (reg.IsNoRegister()) { diff --git a/compiler/utils/array_ref.h b/compiler/utils/array_ref.h new file mode 100644 index 0000000000..2d70b7dd31 --- /dev/null +++ b/compiler/utils/array_ref.h @@ -0,0 +1,173 @@ +/* + * Copyright (C) 2014 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_UTILS_ARRAY_REF_H_ +#define ART_COMPILER_UTILS_ARRAY_REF_H_ + +#include <type_traits> +#include <vector> + +#include "base/logging.h" + +namespace art { + +/** + * @brief A container that references an array. + * + * @details The template class ArrayRef provides a container that references + * an external array. This external array must remain alive while the ArrayRef + * object is in use. The external array may be a std::vector<>-backed storage + * or any other contiguous chunk of memory but that memory must remain valid, + * i.e. the std::vector<> must not be resized for example. + * + * Except for copy/assign and insert/erase/capacity functions, the interface + * is essentially the same as std::vector<>. Since we don't want to throw + * exceptions, at() is also excluded. + */ +template <typename T> +class ArrayRef { + private: + struct tag { }; + + public: + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef T* iterator; + typedef const T* const_iterator; + typedef std::reverse_iterator<iterator> reverse_iterator; + typedef std::reverse_iterator<const_iterator> const_reverse_iterator; + typedef ptrdiff_t difference_type; + typedef size_t size_type; + + // Constructors. + + constexpr ArrayRef() + : array_(nullptr), size_(0u) { + } + + template <size_t size> + constexpr ArrayRef(T (&array)[size]) + : array_(array), size_(size) { + } + + template <typename U, size_t size> + constexpr ArrayRef(U (&array)[size], + typename std::enable_if<std::is_same<T, const U>::value, tag>::type t = tag()) + : array_(array), size_(size) { + } + + constexpr ArrayRef(T* array, size_t size) + : array_(array), size_(size) { + } + + template <typename U> + constexpr ArrayRef(U* array, size_t size, + typename std::enable_if<std::is_same<T, const U>::value, tag>::type t = tag()) + : array_(array), size_(size) { + } + + explicit ArrayRef(std::vector<T>& v) + : array_(v.data()), size_(v.size()) { + } + + template <typename U> + ArrayRef(const std::vector<U>& v, + typename std::enable_if<std::is_same<T, const U>::value, tag>::tag t = tag()) + : array_(v.data()), size_(v.size()) { + } + + // Assignment operators. + + ArrayRef& operator=(const ArrayRef& other) { + array_ = other.array_; + size_ = other.size_; + return *this; + } + + template <typename U> + typename std::enable_if<std::is_same<T, const U>::value, ArrayRef>::type& + operator=(const ArrayRef<U>& other) { + return *this = ArrayRef(other); + } + + // Destructor. + ~ArrayRef() = default; + + // Iterators. + iterator begin() { return array_; } + const_iterator begin() const { return array_; } + const_iterator cbegin() const { return array_; } + iterator end() { return array_ + size_; } + const_iterator end() const { return array_ + size_; } + const_iterator cend() const { return array_ + size_; } + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } + + // Size. + size_type size() const { return size_; } + bool empty() const { return size() == 0u; } + + // Element access. NOTE: Not providing at(). + + reference operator[](size_type n) { + DCHECK_LT(n, size_); + return array_[n]; + } + + const_reference operator[](size_type n) const { + DCHECK_LT(n, size_); + return array_[n]; + } + + reference front() { + DCHECK_NE(size_, 0u); + return array_[0]; + } + + const_reference front() const { + DCHECK_NE(size_, 0u); + return array_[0]; + } + + reference back() { + DCHECK_NE(size_, 0u); + return array_[size_ - 1u]; + } + + const_reference back() const { + DCHECK_NE(size_, 0u); + return array_[size_ - 1u]; + } + + value_type* data() { return array_; } + const value_type* data() const { return array_; } + + private: + T* array_; + size_t size_; +}; + +} // namespace art + + +#endif // ART_COMPILER_UTILS_ARRAY_REF_H_ diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 0791c63f90..56c6536fe5 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1411,10 +1411,12 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, } // return address then method on stack addl(ESP, Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) + - kFramePointerSize /*method*/ + kFramePointerSize /*return address*/)); + sizeof(StackReference<mirror::ArtMethod>) /*method*/ + + kFramePointerSize /*return address*/)); pushl(method_reg.AsX86().AsCpuRegister()); for (size_t i = 0; i < entry_spills.size(); ++i) { - movl(Address(ESP, frame_size + kFramePointerSize + (i * kFramePointerSize)), + movl(Address(ESP, frame_size + sizeof(StackReference<mirror::ArtMethod>) + + (i * kFramePointerSize)), entry_spills.at(i).AsX86().AsCpuRegister()); } } @@ -1422,7 +1424,8 @@ void X86Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, void X86Assembler::RemoveFrame(size_t frame_size, const std::vector<ManagedRegister>& spill_regs) { CHECK_ALIGNED(frame_size, kStackAlignment); - addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - kFramePointerSize)); + addl(ESP, Immediate(frame_size - (spill_regs.size() * kFramePointerSize) - + sizeof(StackReference<mirror::ArtMethod>))); for (size_t i = 0; i < spill_regs.size(); ++i) { popl(spill_regs.at(i).AsX86().AsCpuRegister()); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index 0ede8755e3..a14551c3b7 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -59,7 +59,6 @@ void X86_64Assembler::call(Label* label) { EmitLabel(label, kSize); } - void X86_64Assembler::pushq(CpuRegister reg) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitOptionalRex32(reg); @@ -1652,8 +1651,12 @@ void X86_64Assembler::BuildFrame(size_t frame_size, ManagedRegister method_reg, } // return address then method on stack addq(CpuRegister(RSP), Immediate(-frame_size + (spill_regs.size() * kFramePointerSize) + - kFramePointerSize /*method*/ + kFramePointerSize /*return address*/)); - pushq(method_reg.AsX86_64().AsCpuRegister()); + sizeof(StackReference<mirror::ArtMethod>) /*method*/ + + kFramePointerSize /*return address*/)); + + DCHECK_EQ(4U, sizeof(StackReference<mirror::ArtMethod>)); + subq(CpuRegister(RSP), Immediate(4)); + movl(Address(CpuRegister(RSP), 0), method_reg.AsX86_64().AsCpuRegister()); for (size_t i = 0; i < entry_spills.size(); ++i) { ManagedRegisterSpill spill = entry_spills.at(i); @@ -1732,7 +1735,7 @@ void X86_64Assembler::Store(FrameOffset offs, ManagedRegister msrc, size_t size) void X86_64Assembler::StoreRef(FrameOffset dest, ManagedRegister msrc) { X86_64ManagedRegister src = msrc.AsX86_64(); CHECK(src.IsCpuRegister()); - movq(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); + movl(Address(CpuRegister(RSP), dest), src.AsCpuRegister()); } void X86_64Assembler::StoreRawPtr(FrameOffset dest, ManagedRegister msrc) { @@ -2070,7 +2073,7 @@ void X86_64Assembler::Call(ManagedRegister mbase, Offset offset, ManagedRegister void X86_64Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) { CpuRegister scratch = mscratch.AsX86_64().AsCpuRegister(); - movq(scratch, Address(CpuRegister(RSP), base)); + movl(scratch, Address(CpuRegister(RSP), base)); call(Address(scratch, offset)); } |