diff options
67 files changed, 1408 insertions, 1256 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index e3f8a5cf7f..0b7bd9cee4 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -157,7 +157,6 @@ LIBART_COMPILER_CFLAGS := LIBART_COMPILER_ENUM_OPERATOR_OUT_HEADER_FILES := \ compiled_method.h \ - dex/compiler_enums.h \ dex/dex_to_dex_compiler.h \ driver/compiler_driver.h \ driver/compiler_options.h \ diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h deleted file mode 100644 index 8800e4b08f..0000000000 --- a/compiler/dex/compiler_enums.h +++ /dev/null @@ -1,677 +0,0 @@ -/* - * Copyright (C) 2012 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_DEX_COMPILER_ENUMS_H_ -#define ART_COMPILER_DEX_COMPILER_ENUMS_H_ - -#include "dex_instruction.h" - -namespace art { - -enum RegisterClass { - kInvalidRegClass, - kCoreReg, - kFPReg, - kRefReg, - kAnyReg, -}; -std::ostream& operator<<(std::ostream& os, const RegisterClass& rhs); - -enum BitsUsed { - kSize32Bits, - kSize64Bits, - kSize128Bits, - kSize256Bits, - kSize512Bits, - kSize1024Bits, -}; -std::ostream& operator<<(std::ostream& os, const BitsUsed& rhs); - -enum SpecialTargetRegister { - kSelf, // Thread pointer. - kSuspend, // Used to reduce suspend checks for some targets. - kLr, - kPc, - kSp, - kArg0, - kArg1, - kArg2, - kArg3, - kArg4, - kArg5, - kArg6, - kArg7, - kFArg0, - kFArg1, - kFArg2, - kFArg3, - kFArg4, - kFArg5, - kFArg6, - kFArg7, - kFArg8, - kFArg9, - kFArg10, - kFArg11, - kFArg12, - kFArg13, - kFArg14, - kFArg15, - kRet0, - kRet1, - kInvokeTgt, - kHiddenArg, - kHiddenFpArg, - kCount -}; -std::ostream& operator<<(std::ostream& os, const SpecialTargetRegister& code); - -enum RegLocationType { - kLocDalvikFrame = 0, // Normal Dalvik register - kLocPhysReg, - kLocCompilerTemp, - kLocInvalid -}; -std::ostream& operator<<(std::ostream& os, const RegLocationType& rhs); - -enum BBType { - kNullBlock, - kEntryBlock, - kDalvikByteCode, - kExitBlock, - kExceptionHandling, - kDead, -}; -std::ostream& operator<<(std::ostream& os, const BBType& code); - -// Shared pseudo opcodes - must be < 0. -enum LIRPseudoOpcode { - kPseudoPrologueBegin = -18, - kPseudoPrologueEnd = -17, - kPseudoEpilogueBegin = -16, - kPseudoEpilogueEnd = -15, - kPseudoExportedPC = -14, - kPseudoSafepointPC = -13, - kPseudoIntrinsicRetry = -12, - kPseudoSuspendTarget = -11, - kPseudoThrowTarget = -10, - kPseudoCaseLabel = -9, - kPseudoBarrier = -8, - kPseudoEntryBlock = -7, - kPseudoExitBlock = -6, - kPseudoTargetLabel = -5, - kPseudoDalvikByteCodeBoundary = -4, - kPseudoPseudoAlign4 = -3, - kPseudoEHBlockLabel = -2, - kPseudoNormalBlockLabel = -1, -}; -std::ostream& operator<<(std::ostream& os, const LIRPseudoOpcode& rhs); - -enum ExtendedMIROpcode { - kMirOpFirst = kNumPackedOpcodes, - kMirOpPhi = kMirOpFirst, - - // @brief Copy from one VR to another. - // @details - // vA: destination VR - // vB: source VR - kMirOpCopy, - - // @brief Used to do float comparison with less-than bias. - // @details Unlike cmpl-float, this does not store result of comparison in VR. - // vA: left-hand side VR for comparison. - // vB: right-hand side VR for comparison. - kMirOpFusedCmplFloat, - - // @brief Used to do float comparison with greater-than bias. - // @details Unlike cmpg-float, this does not store result of comparison in VR. - // vA: left-hand side VR for comparison. - // vB: right-hand side VR for comparison. - kMirOpFusedCmpgFloat, - - // @brief Used to do double comparison with less-than bias. - // @details Unlike cmpl-double, this does not store result of comparison in VR. - // vA: left-hand side wide VR for comparison. - // vB: right-hand side wide VR for comparison. - kMirOpFusedCmplDouble, - - // @brief Used to do double comparison with greater-than bias. - // @details Unlike cmpl-double, this does not store result of comparison in VR. - // vA: left-hand side wide VR for comparison. - // vB: right-hand side wide VR for comparison. - kMirOpFusedCmpgDouble, - - // @brief Used to do comparison of 64-bit long integers. - // @details Unlike cmp-long, this does not store result of comparison in VR. - // vA: left-hand side wide VR for comparison. - // vB: right-hand side wide VR for comparison. - kMirOpFusedCmpLong, - - // @brief This represents no-op. - kMirOpNop, - - // @brief Do a null check on the object register. - // @details The backends may implement this implicitly or explicitly. This MIR is guaranteed - // to have the correct offset as an exception thrower. - // vA: object register - kMirOpNullCheck, - - kMirOpRangeCheck, - kMirOpDivZeroCheck, - kMirOpCheck, - kMirOpSelect, - - // Vector opcodes: - // TypeSize is an encoded field giving the element type and the vector size. - // It is encoded as OpSize << 16 | (number of bits in vector) - // - // Destination and source are integers that will be interpreted by the - // backend that supports Vector operations. Backends are permitted to support only - // certain vector register sizes. - // - // At this point, only two operand instructions are supported. Three operand instructions - // could be supported by using a bit in TypeSize and arg[0] where needed. - - // @brief MIR to move constant data to a vector register - // vA: destination - // vB: number of bits in register - // args[0]~args[3]: up to 128 bits of data for initialization - kMirOpConstVector, - - // @brief MIR to move a vectorized register to another - // vA: destination - // vB: source - // vC: TypeSize - kMirOpMoveVector, - - // @brief Packed multiply of units in two vector registers: vB = vB .* vC using vA to know the type of the vector. - // vA: destination and source - // vB: source - // vC: TypeSize - kMirOpPackedMultiply, - - // @brief Packed addition of units in two vector registers: vB = vB .+ vC using vA to know the type of the vector. - // vA: destination and source - // vB: source - // vC: TypeSize - kMirOpPackedAddition, - - // @brief Packed subtraction of units in two vector registers: vB = vB .- vC using vA to know the type of the vector. - // vA: destination and source - // vB: source - // vC: TypeSize - kMirOpPackedSubtract, - - // @brief Packed shift left of units in two vector registers: vB = vB .<< vC using vA to know the type of the vector. - // vA: destination and source - // vB: amount to shift - // vC: TypeSize - kMirOpPackedShiftLeft, - - // @brief Packed signed shift right of units in two vector registers: vB = vB .>> vC using vA to know the type of the vector. - // vA: destination and source - // vB: amount to shift - // vC: TypeSize - kMirOpPackedSignedShiftRight, - - // @brief Packed unsigned shift right of units in two vector registers: vB = vB .>>> vC using vA to know the type of the vector. - // vA: destination and source - // vB: amount to shift - // vC: TypeSize - kMirOpPackedUnsignedShiftRight, - - // @brief Packed bitwise and of units in two vector registers: vB = vB .& vC using vA to know the type of the vector. - // vA: destination and source - // vB: source - // vC: TypeSize - kMirOpPackedAnd, - - // @brief Packed bitwise or of units in two vector registers: vB = vB .| vC using vA to know the type of the vector. - // vA: destination and source - // vB: source - // vC: TypeSize - kMirOpPackedOr, - - // @brief Packed bitwise xor of units in two vector registers: vB = vB .^ vC using vA to know the type of the vector. - // vA: destination and source - // vB: source - // vC: TypeSize - kMirOpPackedXor, - - // @brief Reduce a 128-bit packed element into a single VR by taking lower bits - // @details Instruction does a horizontal addition of the packed elements and then adds it to VR - // vA: destination and source VR (not vector register) - // vB: source (vector register) - // vC: TypeSize - kMirOpPackedAddReduce, - - // @brief Extract a packed element into a single VR. - // vA: destination VR (not vector register) - // vB: source (vector register) - // vC: TypeSize - // arg[0]: The index to use for extraction from vector register (which packed element) - kMirOpPackedReduce, - - // @brief Create a vector value, with all TypeSize values equal to vC - // vA: destination vector register - // vB: source VR (not vector register) - // vC: TypeSize - kMirOpPackedSet, - - // @brief Reserve a range of vector registers. - // vA: Start vector register to reserve. - // vB: Inclusive end vector register to reserve. - // @note: The backend may choose to map vector numbers used in vector opcodes. - // Reserved registers are removed from the list of backend temporary pool. - kMirOpReserveVectorRegisters, - - // @brief Free a range of reserved vector registers - // vA: Start vector register to unreserve. - // vB: Inclusive end vector register to unreserve. - // @note: All currently reserved vector registers are returned to the temporary pool. - kMirOpReturnVectorRegisters, - - // @brief Create a memory barrier. - // vA: a constant defined by enum MemBarrierKind. - kMirOpMemBarrier, - - // @brief Used to fill a vector register with array values. - // @details Just as with normal arrays, access on null object register must ensure NullPointerException - // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same - // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that - // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with - // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK. - // vA: destination vector register - // vB: array register - // vC: index register - // arg[0]: TypeSize (most other vector opcodes have this in vC) - kMirOpPackedArrayGet, - - // @brief Used to store a vector register into array. - // @details Just as with normal arrays, access on null object register must ensure NullPointerException - // and invalid index must ensure ArrayIndexOutOfBoundsException. Exception behavior must be the same - // as the aget it replaced and must happen at same index. Therefore, it is generally recommended that - // before using this MIR, it is proven that exception is guaranteed to not be thrown and marked with - // MIR_IGNORE_NULL_CHECK and MIR_IGNORE_RANGE_CHECK. - // vA: source vector register - // vB: array register - // vC: index register - // arg[0]: TypeSize (most other vector opcodes have this in vC) - kMirOpPackedArrayPut, - - // @brief Multiply-add integer. - // vA: destination - // vB: multiplicand - // vC: multiplier - // arg[0]: addend - kMirOpMaddInt, - - // @brief Multiply-subtract integer. - // vA: destination - // vB: multiplicand - // vC: multiplier - // arg[0]: minuend - kMirOpMsubInt, - - // @brief Multiply-add long. - // vA: destination - // vB: multiplicand - // vC: multiplier - // arg[0]: addend - kMirOpMaddLong, - - // @brief Multiply-subtract long. - // vA: destination - // vB: multiplicand - // vC: multiplier - // arg[0]: minuend - kMirOpMsubLong, - - kMirOpLast, -}; - -enum MIROptimizationFlagPositions { - kMIRIgnoreNullCheck = 0, - kMIRIgnoreRangeCheck, - kMIRIgnoreCheckCast, - kMIRStoreNonNullValue, // Storing non-null value, always mark GC card. - kMIRClassIsInitialized, - kMIRClassIsInDexCache, - kMirIgnoreDivZeroCheck, - kMIRInlined, // Invoke is inlined (ie dead). - kMIRInlinedPred, // Invoke is inlined via prediction. - kMIRCallee, // Instruction is inlined from callee. - kMIRIgnoreSuspendCheck, - kMIRDup, - kMIRMark, // Temporary node mark can be used by - // opt passes for their private needs. - kMIRStoreNonTemporal, - kMIRLastMIRFlag, -}; - -// For successor_block_list. -enum BlockListType { - kNotUsed = 0, - kCatch, - kPackedSwitch, - kSparseSwitch, -}; -std::ostream& operator<<(std::ostream& os, const BlockListType& rhs); - -enum AssemblerStatus { - kSuccess, - kRetryAll, -}; -std::ostream& operator<<(std::ostream& os, const AssemblerStatus& rhs); - -enum OpSize { - kWord, // Natural word size of target (32/64). - k32, - k64, - kReference, // Object reference; compressed on 64-bit targets. - kSingle, - kDouble, - kUnsignedHalf, - kSignedHalf, - kUnsignedByte, - kSignedByte, -}; -std::ostream& operator<<(std::ostream& os, const OpSize& kind); - -enum OpKind { - kOpMov, - kOpCmov, - kOpMvn, - kOpCmp, - kOpLsl, - kOpLsr, - kOpAsr, - kOpRor, - kOpNot, - kOpAnd, - kOpOr, - kOpXor, - kOpNeg, - kOpAdd, - kOpAdc, - kOpSub, - kOpSbc, - kOpRsub, - kOpMul, - kOpDiv, - kOpRem, - kOpBic, - kOpCmn, - kOpTst, - kOpRev, - kOpRevsh, - kOpBkpt, - kOpBlx, - kOpPush, - kOpPop, - kOp2Char, - kOp2Short, - kOp2Byte, - kOpCondBr, - kOpUncondBr, - kOpBx, - kOpInvalid, -}; -std::ostream& operator<<(std::ostream& os, const OpKind& rhs); - -enum MoveType { - kMov8GP, // Move 8-bit general purpose register. - kMov16GP, // Move 16-bit general purpose register. - kMov32GP, // Move 32-bit general purpose register. - kMov64GP, // Move 64-bit general purpose register. - kMov32FP, // Move 32-bit FP register. - kMov64FP, // Move 64-bit FP register. - kMovLo64FP, // Move low 32-bits of 64-bit FP register. - kMovHi64FP, // Move high 32-bits of 64-bit FP register. - kMovU128FP, // Move 128-bit FP register to/from possibly unaligned region. - kMov128FP = kMovU128FP, - kMovA128FP, // Move 128-bit FP register to/from region surely aligned to 16-bytes. - kMovLo128FP, // Move low 64-bits of 128-bit FP register. - kMovHi128FP, // Move high 64-bits of 128-bit FP register. -}; -std::ostream& operator<<(std::ostream& os, const MoveType& kind); - -enum ConditionCode { - kCondEq, // equal - kCondNe, // not equal - kCondCs, // carry set - kCondCc, // carry clear - kCondUlt, // unsigned less than - kCondUge, // unsigned greater than or same - kCondMi, // minus - kCondPl, // plus, positive or zero - kCondVs, // overflow - kCondVc, // no overflow - kCondHi, // unsigned greater than - kCondLs, // unsigned lower or same - kCondGe, // signed greater than or equal - kCondLt, // signed less than - kCondGt, // signed greater than - kCondLe, // signed less than or equal - kCondAl, // always - kCondNv, // never -}; -std::ostream& operator<<(std::ostream& os, const ConditionCode& kind); - -// Target specific condition encodings -enum ArmConditionCode { - kArmCondEq = 0x0, // 0000 - kArmCondNe = 0x1, // 0001 - kArmCondCs = 0x2, // 0010 - kArmCondCc = 0x3, // 0011 - kArmCondMi = 0x4, // 0100 - kArmCondPl = 0x5, // 0101 - kArmCondVs = 0x6, // 0110 - kArmCondVc = 0x7, // 0111 - kArmCondHi = 0x8, // 1000 - kArmCondLs = 0x9, // 1001 - kArmCondGe = 0xa, // 1010 - kArmCondLt = 0xb, // 1011 - kArmCondGt = 0xc, // 1100 - kArmCondLe = 0xd, // 1101 - kArmCondAl = 0xe, // 1110 - kArmCondNv = 0xf, // 1111 -}; -std::ostream& operator<<(std::ostream& os, const ArmConditionCode& kind); - -enum X86ConditionCode { - kX86CondO = 0x0, // overflow - kX86CondNo = 0x1, // not overflow - - kX86CondB = 0x2, // below - kX86CondNae = kX86CondB, // not-above-equal - kX86CondC = kX86CondB, // carry - - kX86CondNb = 0x3, // not-below - kX86CondAe = kX86CondNb, // above-equal - kX86CondNc = kX86CondNb, // not-carry - - kX86CondZ = 0x4, // zero - kX86CondEq = kX86CondZ, // equal - - kX86CondNz = 0x5, // not-zero - kX86CondNe = kX86CondNz, // not-equal - - kX86CondBe = 0x6, // below-equal - kX86CondNa = kX86CondBe, // not-above - - kX86CondNbe = 0x7, // not-below-equal - kX86CondA = kX86CondNbe, // above - - kX86CondS = 0x8, // sign - kX86CondNs = 0x9, // not-sign - - kX86CondP = 0xa, // 8-bit parity even - kX86CondPE = kX86CondP, - - kX86CondNp = 0xb, // 8-bit parity odd - kX86CondPo = kX86CondNp, - - kX86CondL = 0xc, // less-than - kX86CondNge = kX86CondL, // not-greater-equal - - kX86CondNl = 0xd, // not-less-than - kX86CondGe = kX86CondNl, // not-greater-equal - - kX86CondLe = 0xe, // less-than-equal - kX86CondNg = kX86CondLe, // not-greater - - kX86CondNle = 0xf, // not-less-than - kX86CondG = kX86CondNle, // greater -}; -std::ostream& operator<<(std::ostream& os, const X86ConditionCode& kind); - -enum DividePattern { - DivideNone, - Divide3, - Divide5, - Divide7, -}; -std::ostream& operator<<(std::ostream& os, const DividePattern& pattern); - -/** - * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers"). - * @details We define the combined barrier types that are actually required - * by the Java Memory Model, rather than using exactly the terminology from - * the JSR-133 cookbook. These should, in many cases, be replaced by acquire/release - * primitives. Note that the JSR-133 cookbook generally does not deal with - * store atomicity issues, and the recipes there are not always entirely sufficient. - * The current recipe is as follows: - * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store. - * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.) - * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load. - * -# Use StoreStore barrier after all stores but before return from any constructor whose - * class has final fields. - * -# Use NTStoreStore to order non-temporal stores with respect to all later - * store-to-memory instructions. Only generated together with non-temporal stores. - */ -enum MemBarrierKind { - kAnyStore, - kLoadAny, - kStoreStore, - kAnyAny, - kNTStoreStore, - kLastBarrierKind = kNTStoreStore -}; -std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind); - -enum OpFeatureFlags { - kIsBranch = 0, - kNoOperand, - kIsUnaryOp, - kIsBinaryOp, - kIsTertiaryOp, - kIsQuadOp, - kIsQuinOp, - kIsSextupleOp, - kIsIT, - kIsMoveOp, - kMemLoad, - kMemStore, - kMemVolatile, - kMemScaledx0, - kMemScaledx2, - kMemScaledx4, - kPCRelFixup, // x86 FIXME: add NEEDS_FIXUP to instruction attributes. - kRegDef0, - kRegDef1, - kRegDef2, - kRegDefA, - kRegDefD, - kRegDefFPCSList0, - kRegDefFPCSList2, - kRegDefList0, - kRegDefList1, - kRegDefList2, - kRegDefLR, - kRegDefSP, - kRegUse0, - kRegUse1, - kRegUse2, - kRegUse3, - kRegUse4, - kRegUseA, - kRegUseC, - kRegUseD, - kRegUseB, - kRegUseFPCSList0, - kRegUseFPCSList2, - kRegUseList0, - kRegUseList1, - kRegUseLR, - kRegUsePC, - kRegUseSP, - kSetsCCodes, - kUsesCCodes, - kUseFpStack, - kUseHi, - kUseLo, - kDefHi, - kDefLo -}; -std::ostream& operator<<(std::ostream& os, const OpFeatureFlags& rhs); - -enum SelectInstructionKind { - kSelectNone, - kSelectConst, - kSelectMove, - kSelectGoto -}; -std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind); - -// LIR fixup kinds for Arm and X86. -enum FixupKind { - kFixupNone, - kFixupLabel, // For labels we just adjust the offset. - kFixupLoad, // Mostly for immediates. - kFixupVLoad, // FP load which *may* be pc-relative. - kFixupCBxZ, // Cbz, Cbnz. - kFixupTBxZ, // Tbz, Tbnz. - kFixupCondBranch, // Conditional branch - kFixupT1Branch, // Thumb1 Unconditional branch - kFixupT2Branch, // Thumb2 Unconditional branch - kFixupBlx1, // Blx1 (start of Blx1/Blx2 pair). - kFixupBl1, // Bl1 (start of Bl1/Bl2 pair). - kFixupAdr, // Adr. - kFixupMovImmLST, // kThumb2MovImm16LST. - kFixupMovImmHST, // kThumb2MovImm16HST. - kFixupAlign4, // Align to 4-byte boundary. - kFixupA53Erratum835769, // Cortex A53 Erratum 835769. - kFixupSwitchTable, // X86_64 packed switch table. -}; -std::ostream& operator<<(std::ostream& os, const FixupKind& kind); - -enum VolatileKind { - kNotVolatile, // Load/Store is not volatile - kVolatile // Load/Store is volatile -}; -std::ostream& operator<<(std::ostream& os, const VolatileKind& kind); - -enum WideKind { - kNotWide, // Non-wide view - kWide, // Wide view - kRef // Ref width -}; -std::ostream& operator<<(std::ostream& os, const WideKind& kind); - -} // namespace art - -#endif // ART_COMPILER_DEX_COMPILER_ENUMS_H_ diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index fbe403f596..50dc032115 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -21,7 +21,6 @@ #include "base/mutex.h" #include "base/macros.h" #include "safe_map.h" -#include "dex/compiler_enums.h" #include "dex_file.h" #include "quick/inline_method_analyser.h" @@ -31,6 +30,13 @@ namespace verifier { class MethodVerifier; } // namespace verifier +enum OpSize { + k32, + k64, + kSignedHalf, + kSignedByte, +}; + /** * Handles inlining of methods from a particular DexFile. * diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc index 7c0423b635..c09950cd5d 100644 --- a/compiler/linker/mips/relative_patcher_mips.cc +++ b/compiler/linker/mips/relative_patcher_mips.cc @@ -49,6 +49,7 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, uint32_t target_offset) { uint32_t anchor_literal_offset = patch.PcInsnOffset(); uint32_t literal_offset = patch.LiteralOffset(); + bool dex_cache_array = (patch.GetType() == LinkerPatch::Type::kDexCacheArray); // Basic sanity checks. if (is_r6) { @@ -68,12 +69,16 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, DCHECK_GE(code->size(), 16u); DCHECK_LE(literal_offset, code->size() - 12u); DCHECK_GE(literal_offset, 4u); - DCHECK_EQ(literal_offset + 4u, anchor_literal_offset); - // NAL - DCHECK_EQ((*code)[literal_offset - 4], 0x00); - DCHECK_EQ((*code)[literal_offset - 3], 0x00); - DCHECK_EQ((*code)[literal_offset - 2], 0x10); - DCHECK_EQ((*code)[literal_offset - 1], 0x04); + // The NAL instruction may not precede immediately as the PC+0 value may + // come from HMipsComputeBaseMethodAddress. + if (dex_cache_array) { + DCHECK_EQ(literal_offset + 4u, anchor_literal_offset); + // NAL + DCHECK_EQ((*code)[literal_offset - 4], 0x00); + DCHECK_EQ((*code)[literal_offset - 3], 0x00); + DCHECK_EQ((*code)[literal_offset - 2], 0x10); + DCHECK_EQ((*code)[literal_offset - 1], 0x04); + } // LUI reg, offset_high DCHECK_EQ((*code)[literal_offset + 0], 0x34); DCHECK_EQ((*code)[literal_offset + 1], 0x12); @@ -83,16 +88,22 @@ void MipsRelativePatcher::PatchPcRelativeReference(std::vector<uint8_t>* code, DCHECK_EQ((*code)[literal_offset + 4], 0x78); DCHECK_EQ((*code)[literal_offset + 5], 0x56); DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x34); - // ADDU reg, reg, RA + // ADDU reg, reg, reg2 DCHECK_EQ((*code)[literal_offset + 8], 0x21); DCHECK_EQ(((*code)[literal_offset + 9] & 0x07), 0x00); - DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F); + if (dex_cache_array) { + // reg2 is either RA or from HMipsComputeBaseMethodAddress. + DCHECK_EQ(((*code)[literal_offset + 10] & 0x1F), 0x1F); + } DCHECK_EQ(((*code)[literal_offset + 11] & 0xFC), 0x00); } // Apply patch. uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset; - uint32_t diff = target_offset - anchor_offset + kDexCacheArrayLwOffset; + uint32_t diff = target_offset - anchor_offset; + if (dex_cache_array) { + diff += kDexCacheArrayLwOffset; + } if (is_r6) { diff += (diff & 0x8000) << 1; // Account for sign extension in ADDIU. } diff --git a/compiler/linker/mips/relative_patcher_mips32r6_test.cc b/compiler/linker/mips/relative_patcher_mips32r6_test.cc index 0f1dcbcbf1..a16aaca545 100644 --- a/compiler/linker/mips/relative_patcher_mips32r6_test.cc +++ b/compiler/linker/mips/relative_patcher_mips32r6_test.cc @@ -29,40 +29,78 @@ class Mips32r6RelativePatcherTest : public RelativePatcherTest { Mips32r6RelativePatcherTest() : RelativePatcherTest(kMips, "mips32r6") {} protected: + static const uint8_t UnpatchedPcRelativeRawCode[]; + static const uint32_t LiteralOffset; + static const uint32_t AnchorOffset; + static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode; + uint32_t GetMethodOffset(uint32_t method_idx) { auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); CHECK(result.first); return result.second; } + + void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); + void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); + void TestStringReference(uint32_t string_offset); }; -TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) { - dex_cache_arrays_begin_ = 0x12345678; - constexpr size_t kElementOffset = 0x1234; - static const uint8_t raw_code[] = { - 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 - 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 - }; - constexpr uint32_t literal_offset = 0; // At auipc (where patching starts). - constexpr uint32_t anchor_offset = literal_offset; // At auipc (where PC+0 points). - ArrayRef<const uint8_t> code(raw_code); - LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), - }; - AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); +const uint8_t Mips32r6RelativePatcherTest::UnpatchedPcRelativeRawCode[] = { + 0x34, 0x12, 0x5E, 0xEE, // auipc s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x26, // addiu s2, s2, low(diff); placeholder = 0x5678 +}; +const uint32_t Mips32r6RelativePatcherTest::LiteralOffset = 0; // At auipc (where patching starts). +const uint32_t Mips32r6RelativePatcherTest::AnchorOffset = 0; // At auipc (where PC+0 points). +const ArrayRef<const uint8_t> Mips32r6RelativePatcherTest::UnpatchedPcRelativeCode( + UnpatchedPcRelativeRawCode); + +void Mips32r6RelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, + uint32_t target_offset) { + AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); Link(); auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); ASSERT_TRUE(result.first); - uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + - kDexCacheArrayLwOffset; + + uint32_t diff = target_offset - (result.second + AnchorOffset); + if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) { + diff += kDexCacheArrayLwOffset; + } diff += (diff & 0x8000) << 1; // Account for sign extension in addiu. - static const uint8_t expected_code[] = { + + const uint8_t expected_code[] = { static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x5E, 0xEE, static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x26, }; EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } +void Mips32r6RelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, + uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset) + }; + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), + dex_cache_arrays_begin_ + element_offset); +} + +void Mips32r6RelativePatcherTest::TestStringReference(uint32_t string_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_offset); + LinkerPatch patches[] = { + LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex) + }; + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); +} + +TEST_F(Mips32r6RelativePatcherTest, DexCacheReference) { + TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234); +} + +TEST_F(Mips32r6RelativePatcherTest, StringReference) { + TestStringReference(/* string_offset*/ 0x87651234); +} + } // namespace linker } // namespace art diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc index 8391b5352a..335ce2e476 100644 --- a/compiler/linker/mips/relative_patcher_mips_test.cc +++ b/compiler/linker/mips/relative_patcher_mips_test.cc @@ -29,36 +29,47 @@ class MipsRelativePatcherTest : public RelativePatcherTest { MipsRelativePatcherTest() : RelativePatcherTest(kMips, "mips32r2") {} protected: + static const uint8_t UnpatchedPcRelativeRawCode[]; + static const uint32_t LiteralOffset; + static const uint32_t AnchorOffset; + static const ArrayRef<const uint8_t> UnpatchedPcRelativeCode; + uint32_t GetMethodOffset(uint32_t method_idx) { auto result = method_offset_map_.FindMethodOffset(MethodRef(method_idx)); CHECK(result.first); return result.second; } + + void CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, uint32_t target_offset); + void TestDexCacheReference(uint32_t dex_cache_arrays_begin, uint32_t element_offset); + void TestStringReference(uint32_t string_offset); }; -TEST_F(MipsRelativePatcherTest, DexCacheReference) { - dex_cache_arrays_begin_ = 0x12345678; - constexpr size_t kElementOffset = 0x1234; - static const uint8_t raw_code[] = { - 0x00, 0x00, 0x10, 0x04, // nal - 0x34, 0x12, 0x12, 0x3C, // lui s2, high(diff); placeholder = 0x1234 - 0x78, 0x56, 0x52, 0x36, // ori s2, s2, low(diff); placeholder = 0x5678 - 0x21, 0x90, 0x5F, 0x02, // addu s2, s2, ra - }; - constexpr uint32_t literal_offset = 4; // At lui (where patching starts). - constexpr uint32_t anchor_offset = 8; // At ori (where PC+0 points). - ArrayRef<const uint8_t> code(raw_code); - LinkerPatch patches[] = { - LinkerPatch::DexCacheArrayPatch(literal_offset, nullptr, anchor_offset, kElementOffset), - }; - AddCompiledMethod(MethodRef(1u), code, ArrayRef<const LinkerPatch>(patches)); +const uint8_t MipsRelativePatcherTest::UnpatchedPcRelativeRawCode[] = { + 0x00, 0x00, 0x10, 0x04, // nal + 0x34, 0x12, 0x12, 0x3C, // lui s2, high(diff); placeholder = 0x1234 + 0x78, 0x56, 0x52, 0x36, // ori s2, s2, low(diff); placeholder = 0x5678 + 0x21, 0x90, 0x5F, 0x02, // addu s2, s2, ra +}; +const uint32_t MipsRelativePatcherTest::LiteralOffset = 4; // At lui (where patching starts). +const uint32_t MipsRelativePatcherTest::AnchorOffset = 8; // At ori (where PC+0 points). +const ArrayRef<const uint8_t> MipsRelativePatcherTest::UnpatchedPcRelativeCode( + UnpatchedPcRelativeRawCode); + +void MipsRelativePatcherTest::CheckPcRelativePatch(const ArrayRef<const LinkerPatch>& patches, + uint32_t target_offset) { + AddCompiledMethod(MethodRef(1u), UnpatchedPcRelativeCode, ArrayRef<const LinkerPatch>(patches)); Link(); auto result = method_offset_map_.FindMethodOffset(MethodRef(1u)); ASSERT_TRUE(result.first); - uint32_t diff = dex_cache_arrays_begin_ + kElementOffset - (result.second + anchor_offset) + - kDexCacheArrayLwOffset; - static const uint8_t expected_code[] = { + + uint32_t diff = target_offset - (result.second + AnchorOffset); + if (patches[0].GetType() == LinkerPatch::Type::kDexCacheArray) { + diff += kDexCacheArrayLwOffset; + } + + const uint8_t expected_code[] = { 0x00, 0x00, 0x10, 0x04, static_cast<uint8_t>(diff >> 16), static_cast<uint8_t>(diff >> 24), 0x12, 0x3C, static_cast<uint8_t>(diff), static_cast<uint8_t>(diff >> 8), 0x52, 0x36, @@ -67,5 +78,32 @@ TEST_F(MipsRelativePatcherTest, DexCacheReference) { EXPECT_TRUE(CheckLinkedMethod(MethodRef(1u), ArrayRef<const uint8_t>(expected_code))); } +void MipsRelativePatcherTest::TestDexCacheReference(uint32_t dex_cache_arrays_begin, + uint32_t element_offset) { + dex_cache_arrays_begin_ = dex_cache_arrays_begin; + LinkerPatch patches[] = { + LinkerPatch::DexCacheArrayPatch(LiteralOffset, nullptr, AnchorOffset, element_offset) + }; + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), + dex_cache_arrays_begin_ + element_offset); +} + +void MipsRelativePatcherTest::TestStringReference(uint32_t string_offset) { + constexpr uint32_t kStringIndex = 1u; + string_index_to_offset_map_.Put(kStringIndex, string_offset); + LinkerPatch patches[] = { + LinkerPatch::RelativeStringPatch(LiteralOffset, nullptr, AnchorOffset, kStringIndex) + }; + CheckPcRelativePatch(ArrayRef<const LinkerPatch>(patches), string_offset); +} + +TEST_F(MipsRelativePatcherTest, DexCacheReference) { + TestDexCacheReference(/* dex_cache_arrays_begin */ 0x12345678, /* element_offset */ 0x1234); +} + +TEST_F(MipsRelativePatcherTest, StringReference) { + TestStringReference(/* string_offset*/ 0x87651234); +} + } // namespace linker } // namespace art diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc index 18ebfeb7f4..0762eec830 100644 --- a/compiler/oat_test.cc +++ b/compiler/oat_test.cc @@ -444,7 +444,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(72U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(20U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(164 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); + EXPECT_EQ(162 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints)); } TEST_F(OatTest, OatHeaderIsValid) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2042adef1c..62dd1cc818 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -359,7 +359,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // Return the entry point offset for ReadBarrierMarkRegX, where X is `reg`. template <size_t pointer_size> static int32_t GetReadBarrierMarkEntryPointsOffset(size_t reg) { - DCHECK_LT(reg, 32u); + // The entry point list defines 30 ReadBarrierMarkRegX entry points. + DCHECK_LT(reg, 30u); // The ReadBarrierMarkRegX entry points are ordered by increasing // register number in Thread::tls_Ptr_.quick_entrypoints. return QUICK_ENTRYPOINT_OFFSET(pointer_size, pReadBarrierMarkReg00).Int32Value() diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 05cb8d1940..a07a2331fc 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -18,7 +18,6 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_H_ #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "string_reference.h" diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 88e8cead32..03f5a3364c 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -20,7 +20,6 @@ #include "arch/arm64/quick_method_frame_info_arm64.h" #include "code_generator.h" #include "common_arm64.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 39248aa430..334d30d90e 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -482,11 +482,22 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena(), &isa_features), isa_features_(isa_features), + uint32_literals_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), method_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(StringReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(TypeReferenceValueComparator(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_address_patches_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + clobbered_ra_(false) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -688,6 +699,16 @@ void CodeGeneratorMIPS::ComputeSpillMask() { if ((fpu_spill_mask_ != 0) && (POPCOUNT(core_spill_mask_) % 2 != 0)) { core_spill_mask_ |= (1 << ZERO); } + // If RA is clobbered by PC-relative operations on R2 and it's the only spilled register + // (this can happen in leaf methods), artificially spill the ZERO register in order to + // force explicit saving and restoring of RA. RA isn't saved/restored when it's the only + // spilled register. + // TODO: Can this be improved? It causes creation of a stack frame (while RA might be + // saved in an unused temporary register) and saving of RA and the current method pointer + // in the frame. + if (clobbered_ra_ && core_spill_mask_ == (1u << RA) && fpu_spill_mask_ == 0) { + core_spill_mask_ |= (1 << ZERO); + } } static dwarf::Reg DWARFReg(Register reg) { @@ -962,7 +983,12 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch size_t size = method_patches_.size() + call_patches_.size() + - pc_relative_dex_cache_patches_.size(); + pc_relative_dex_cache_patches_.size() + + pc_relative_string_patches_.size() + + pc_relative_type_patches_.size() + + boot_image_string_patches_.size() + + boot_image_type_patches_.size() + + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -994,6 +1020,71 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch pc_rel_offset, base_element_offset)); } + for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t string_index = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + // On R2 we use HMipsComputeBaseMethodAddress and patch relative to + // the assembler's base label used for PC-relative literals. + uint32_t pc_rel_offset = info.pc_rel_label.IsBound() + ? __ GetLabelLocation(&info.pc_rel_label) + : __ GetPcRelBaseLabelLocation(); + linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset, + &dex_file, + pc_rel_offset, + string_index)); + } + for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { + const DexFile& dex_file = info.target_dex_file; + size_t type_index = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + // On R2 we use HMipsComputeBaseMethodAddress and patch relative to + // the assembler's base label used for PC-relative literals. + uint32_t pc_rel_offset = info.pc_rel_label.IsBound() + ? __ GetLabelLocation(&info.pc_rel_label) + : __ GetPcRelBaseLabelLocation(); + linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset, + &dex_file, + pc_rel_offset, + type_index)); + } + for (const auto& entry : boot_image_string_patches_) { + const StringReference& target_string = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, + target_string.dex_file, + target_string.string_index)); + } + for (const auto& entry : boot_image_type_patches_) { + const TypeReference& target_type = entry.first; + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, + target_type.dex_file, + target_type.type_index)); + } + for (const auto& entry : boot_image_address_patches_) { + DCHECK(GetCompilerOptions().GetIncludePatchInformation()); + Literal* literal = entry.second; + DCHECK(literal->GetLabel()->IsBound()); + uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel()); + linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); + } +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( + const DexFile& dex_file, uint32_t string_index) { + return NewPcRelativePatch(dex_file, string_index, &pc_relative_string_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( + const DexFile& dex_file, uint32_t type_index) { + return NewPcRelativePatch(dex_file, type_index, &pc_relative_type_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( @@ -1007,6 +1098,12 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( return &patches->back(); } +Literal* CodeGeneratorMIPS::DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map) { + return map->GetOrCreate( + value, + [this, value]() { return __ NewLiteral<uint32_t>(value); }); +} + Literal* CodeGeneratorMIPS::DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map) { return map->GetOrCreate( @@ -1022,6 +1119,26 @@ Literal* CodeGeneratorMIPS::DeduplicateMethodCodeLiteral(MethodReference target_ return DeduplicateMethodLiteral(target_method, &call_patches_); } +Literal* CodeGeneratorMIPS::DeduplicateBootImageStringLiteral(const DexFile& dex_file, + uint32_t string_index) { + return boot_image_string_patches_.GetOrCreate( + StringReference(&dex_file, string_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateBootImageTypeLiteral(const DexFile& dex_file, + uint32_t type_index) { + return boot_image_type_patches_.GetOrCreate( + TypeReference(&dex_file, type_index), + [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); +} + +Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) { + bool needs_patch = GetCompilerOptions().GetIncludePatchInformation(); + Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_; + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -1067,6 +1184,15 @@ void CodeGeneratorMIPS::SetupBlockedRegisters() const { blocked_fpu_registers_[i] = true; } + if (GetGraph()->IsDebuggable()) { + // Stubs do not save callee-save floating point registers. If the graph + // is debuggable, we need to deal with these registers differently. For + // now, just block them. + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } + } + UpdateBlockedPairRegisters(); } @@ -3440,7 +3566,8 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field if (field_type == Primitive::kPrimLong) { locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong)); } else { - locations->SetOut(Location::RequiresFpuRegister()); + // Use Location::Any() to prevent situations when running out of available fp registers. + locations->SetOut(Location::Any()); // Need some temp core regs since FP results are returned in core registers Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong); locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>())); @@ -3505,11 +3632,23 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, IsDirectEntrypoint(kQuickA64Load)); CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>(); if (type == Primitive::kPrimDouble) { - // Need to move to FP regs since FP results are returned in core registers. - __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), - locations->Out().AsFpuRegister<FRegister>()); - __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - locations->Out().AsFpuRegister<FRegister>()); + // FP results are returned in core registers. Need to move them. + Location out = locations->Out(); + if (out.IsFpuRegister()) { + __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>()); + __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + out.AsFpuRegister<FRegister>()); + } else { + DCHECK(out.IsDoubleStackSlot()); + __ StoreToOffset(kStoreWord, + locations->GetTemp(1).AsRegister<Register>(), + SP, + out.GetStackIndex()); + __ StoreToOffset(kStoreWord, + locations->GetTemp(2).AsRegister<Register>(), + SP, + out.GetStackIndex() + 4); + } } } else { if (!Primitive::IsFloatingPointType(type)) { @@ -3568,7 +3707,8 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field locations->SetInAt(1, Location::RegisterPairLocation( calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); } else { - locations->SetInAt(1, Location::RequiresFpuRegister()); + // Use Location::Any() to prevent situations when running out of available fp registers. + locations->SetInAt(1, Location::Any()); // Pass FP parameters in core registers. locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3))); @@ -3627,10 +3767,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); if (type == Primitive::kPrimDouble) { // Pass FP parameters in core registers. - __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), - locations->InAt(1).AsFpuRegister<FRegister>()); - __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - locations->InAt(1).AsFpuRegister<FRegister>()); + Location in = locations->InAt(1); + if (in.IsFpuRegister()) { + __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), in.AsFpuRegister<FRegister>()); + __ MoveFromFpuHigh(locations->GetTemp(2).AsRegister<Register>(), + in.AsFpuRegister<FRegister>()); + } else if (in.IsDoubleStackSlot()) { + __ LoadFromOffset(kLoadWord, + locations->GetTemp(1).AsRegister<Register>(), + SP, + in.GetStackIndex()); + __ LoadFromOffset(kLoadWord, + locations->GetTemp(2).AsRegister<Register>(), + SP, + in.GetStackIndex() + 4); + } else { + DCHECK(in.IsConstant()); + DCHECK(in.GetConstant()->IsDoubleConstant()); + int64_t value = bit_cast<int64_t, double>(in.GetConstant()->AsDoubleConstant()->GetValue()); + __ LoadConst64(locations->GetTemp(2).AsRegister<Register>(), + locations->GetTemp(1).AsRegister<Register>(), + value); + } } codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pA64Store), instruction, @@ -3696,6 +3854,23 @@ void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* inst HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetDexPc()); } +void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad( + HInstruction* instruction ATTRIBUTE_UNUSED, + Location root, + Register obj, + uint32_t offset) { + Register root_reg = root.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } else { + // Plain GC root load with no read barrier. + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + // Note that GC roots are not affected by heap poisoning, thus we + // do not have to unpoison `root_reg` here. + } +} + void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath; @@ -3861,16 +4036,80 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen } HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( - HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) { - // TODO: Implement other kinds. - return HLoadString::LoadKind::kDexCacheViaMethod; + HLoadString::LoadKind desired_string_load_kind) { + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + // We disable PC-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = has_irreducible_loops; + switch (desired_string_load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadString::LoadKind::kBootImageAddress: + break; + case HLoadString::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + fallback_load = false; + break; + case HLoadString::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // with irreducible loops. + break; + case HLoadString::LoadKind::kDexCacheViaMethod: + fallback_load = false; + break; + } + if (fallback_load) { + desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + } + return desired_string_load_kind; } HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - DCHECK_NE(desired_class_load_kind, HLoadClass::LoadKind::kReferrersClass); - // TODO: Implement other kinds. - return HLoadClass::LoadKind::kDexCacheViaMethod; + if (kEmitCompilerReadBarrier) { + UNIMPLEMENTED(FATAL) << "for read barrier"; + } + // We disable pc-relative load when there is an irreducible loop, as the optimization + // is incompatible with it. + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + bool fallback_load = has_irreducible_loops; + switch (desired_class_load_kind) { + case HLoadClass::LoadKind::kReferrersClass: + fallback_load = false; + break; + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + DCHECK(GetCompilerOptions().GetCompilePic()); + break; + case HLoadClass::LoadKind::kBootImageAddress: + break; + case HLoadClass::LoadKind::kDexCacheAddress: + DCHECK(Runtime::Current()->UseJitCompilation()); + fallback_load = false; + break; + case HLoadClass::LoadKind::kDexCachePcRelative: + DCHECK(!Runtime::Current()->UseJitCompilation()); + // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // with irreducible loops. + break; + case HLoadClass::LoadKind::kDexCacheViaMethod: + fallback_load = false; + break; + } + if (fallback_load) { + desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + } + return desired_class_load_kind; } Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, @@ -4107,11 +4346,40 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { } void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { - InvokeRuntimeCallingConvention calling_convention; - CodeGenerator::CreateLoadClassLocationSummary( - cls, - Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(V0)); + if (cls->NeedsAccessCheck()) { + InvokeRuntimeCallingConvention calling_convention; + CodeGenerator::CreateLoadClassLocationSummary( + cls, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Location::RegisterLocation(V0), + /* code_generator_supports_read_barrier */ false); // TODO: revisit this bool. + return; + } + + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + if (codegen_->GetInstructionSetFeatures().IsR6()) { + break; + } + FALLTHROUGH_INTENDED; + // We need an extra register for PC-relative dex cache accesses. + case HLoadClass::LoadKind::kDexCachePcRelative: + case HLoadClass::LoadKind::kReferrersClass: + case HLoadClass::LoadKind::kDexCacheViaMethod: + locations->SetInAt(0, Location::RequiresRegister()); + break; + default: + break; + } + locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { @@ -4127,34 +4395,126 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { return; } - Register out = locations->Out().AsRegister<Register>(); - Register current_method = locations->InAt(0).AsRegister<Register>(); - if (cls->IsReferrersClass()) { - DCHECK(!cls->CanCallRuntime()); - DCHECK(!cls->MustGenerateClinitCheck()); - __ LoadFromOffset(kLoadWord, out, current_method, - ArtMethod::DeclaringClassOffset().Int32Value()); - } else { - __ LoadFromOffset(kLoadWord, out, current_method, - ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())); - - if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) { - DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, - cls, - cls->GetDexPc(), - cls->MustGenerateClinitCheck()); - codegen_->AddSlowPath(slow_path); - if (!cls->IsInDexCache()) { - __ Beqz(out, slow_path->GetEntryLabel()); - } - if (cls->MustGenerateClinitCheck()) { - GenerateClassInitializationCheck(slow_path, out); + HLoadClass::LoadKind load_kind = cls->GetLoadKind(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); + Register base_or_current_method_reg; + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + break; + // We need an extra register for PC-relative dex cache accesses. + case HLoadClass::LoadKind::kDexCachePcRelative: + case HLoadClass::LoadKind::kReferrersClass: + case HLoadClass::LoadKind::kDexCacheViaMethod: + base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); + break; + default: + base_or_current_method_reg = ZERO; + break; + } + + bool generate_null_check = false; + switch (load_kind) { + case HLoadClass::LoadKind::kReferrersClass: { + DCHECK(!cls->CanCallRuntime()); + DCHECK(!cls->MustGenerateClinitCheck()); + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad(cls, + out_loc, + base_or_current_method_reg, + ArtMethod::DeclaringClassOffset().Int32Value()); + break; + } + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!kEmitCompilerReadBarrier); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), + cls->GetTypeIndex())); + break; + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + if (isR6) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(out, /* placeholder */ 0x1234); + __ Addiu(out, out, /* placeholder */ 0x5678); } else { - __ Bind(slow_path->GetExitLabel()); + __ Bind(&info->high_label); + __ Lui(out, /* placeholder */ 0x1234); + // We do not bind info->pc_rel_label here, we'll use the assembler's label + // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. + __ Ori(out, out, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(out, out, base_or_current_method_reg); } + break; + } + case HLoadClass::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + break; + } + case HLoadClass::LoadKind::kDexCacheAddress: { + DCHECK_NE(cls->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress()); + static_assert(sizeof(GcRoot<mirror::Class>) == 4u, "Expected GC root to be 4 bytes."); + DCHECK_ALIGNED(cls->GetAddress(), 4u); + int16_t offset = Low16Bits(address); + uint32_t base_address = address - offset; // This accounts for offset sign extension. + __ Lui(out, High16Bits(base_address)); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = cls->InputAt(0)->AsMipsDexCacheArraysBase(); + int32_t offset = + cls->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset) + GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, offset); + generate_null_check = !cls->IsInDexCache(); + break; + } + case HLoadClass::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class>[] */ out = + // current_method.ptr_sized_fields_->dex_cache_resolved_types_ + __ LoadFromOffset(kLoadWord, + out, + base_or_current_method_reg, + ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value()); + // /* GcRoot<mirror::Class> */ out = out[type_index] + size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex()); + GenerateGcRootFieldLoad(cls, out_loc, out, offset); + generate_null_check = !cls->IsInDexCache(); + } + } + + if (generate_null_check || cls->MustGenerateClinitCheck()) { + DCHECK(cls->CanCallRuntime()); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + codegen_->AddSlowPath(slow_path); + if (generate_null_check) { + __ Beqz(out, slow_path->GetEntryLabel()); + } + if (cls->MustGenerateClinitCheck()) { + GenerateClassInitializationCheck(slow_path, out); + } else { + __ Bind(slow_path->GetExitLabel()); } } } @@ -4183,21 +4543,132 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT } void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { - LocationSummary::CallKind call_kind = load->NeedsEnvironment() + LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - locations->SetInAt(0, Location::RequiresRegister()); + HLoadString::LoadKind load_kind = load->GetLoadKind(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + if (codegen_->GetInstructionSetFeatures().IsR6()) { + break; + } + FALLTHROUGH_INTENDED; + // We need an extra register for PC-relative dex cache accesses. + case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kDexCacheViaMethod: + locations->SetInAt(0, Location::RequiresRegister()); + break; + default: + break; + } locations->SetOut(Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { + HLoadString::LoadKind load_kind = load->GetLoadKind(); LocationSummary* locations = load->GetLocations(); - Register out = locations->Out().AsRegister<Register>(); - Register current_method = locations->InAt(0).AsRegister<Register>(); - __ LoadFromOffset(kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); - __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex())); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); + Register base_or_current_method_reg; + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + switch (load_kind) { + // We need an extra register for PC-relative literals on R2. + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + break; + // We need an extra register for PC-relative dex cache accesses. + case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kDexCacheViaMethod: + base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); + break; + default: + base_or_current_method_reg = ZERO; + break; + } + + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + DCHECK(!kEmitCompilerReadBarrier); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(), + load->GetStringIndex())); + return; // No dex cache slow path. + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(!kEmitCompilerReadBarrier); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + if (isR6) { + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(out, /* placeholder */ 0x1234); + __ Addiu(out, out, /* placeholder */ 0x5678); + } else { + __ Bind(&info->high_label); + __ Lui(out, /* placeholder */ 0x1234); + // We do not bind info->pc_rel_label here, we'll use the assembler's label + // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. + __ Ori(out, out, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(out, out, base_or_current_method_reg); + } + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kBootImageAddress: { + DCHECK(!kEmitCompilerReadBarrier); + DCHECK_NE(load->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + return; // No dex cache slow path. + } + case HLoadString::LoadKind::kDexCacheAddress: { + DCHECK_NE(load->GetAddress(), 0u); + uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress()); + static_assert(sizeof(GcRoot<mirror::String>) == 4u, "Expected GC root to be 4 bytes."); + DCHECK_ALIGNED(load->GetAddress(), 4u); + int16_t offset = Low16Bits(address); + uint32_t base_address = address - offset; // This accounts for offset sign extension. + __ Lui(out, High16Bits(base_address)); + // /* GcRoot<mirror::String> */ out = *(base_address + offset) + GenerateGcRootFieldLoad(load, out_loc, out, offset); + break; + } + case HLoadString::LoadKind::kDexCachePcRelative: { + HMipsDexCacheArraysBase* base = load->InputAt(0)->AsMipsDexCacheArraysBase(); + int32_t offset = + load->GetDexCacheElementOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + // /* GcRoot<mirror::String> */ out = *(dex_cache_arrays_base + offset) + GenerateGcRootFieldLoad(load, out_loc, base_or_current_method_reg, offset); + break; + } + case HLoadString::LoadKind::kDexCacheViaMethod: { + // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ + GenerateGcRootFieldLoad(load, + out_loc, + base_or_current_method_reg, + ArtMethod::DeclaringClassOffset().Int32Value()); + // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_ + __ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value()); + // /* GcRoot<mirror::String> */ out = out[string_index] + GenerateGcRootFieldLoad(load, + out_loc, + out, + CodeGenerator::GetCacheOffset(load->GetStringIndex())); + break; + } + default: + LOG(FATAL) << "Unexpected load kind: " << load->GetLoadKind(); + UNREACHABLE(); + } if (!load->IsInDexCache()) { SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); @@ -5327,6 +5798,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( __ Nal(); // Grab the return address off RA. __ Move(reg, RA); + // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()? // Remember this offset (the obtained PC value) for later use with constant area. __ BindPcRelBaseLabel(); @@ -5357,6 +5829,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra __ Ori(reg, reg, /* placeholder */ 0x5678); // Add a 32-bit offset to PC. __ Addu(reg, reg, RA); + // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()? } } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 08f74c04d1..63a0345c1c 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -18,11 +18,12 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_ #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" +#include "string_reference.h" #include "utils/mips/assembler_mips.h" +#include "utils/type_reference.h" namespace art { namespace mips { @@ -226,6 +227,15 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void HandleShift(HBinaryOperation* operation); void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + // Generate a GC root reference load: + // + // root <- *(obj + offset) + // + // while honoring read barriers (if any). + void GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset); void GenerateIntCompare(IfCondition cond, LocationSummary* locations); void GenerateIntCompareAndBranch(IfCondition cond, LocationSummary* locations, @@ -298,6 +308,9 @@ class CodeGeneratorMIPS : public CodeGenerator { size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id); size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id); size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id); + void ClobberRA() { + clobbered_ra_ = true; + } void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE; void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE; @@ -383,7 +396,7 @@ class CodeGeneratorMIPS : public CodeGenerator { PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; const DexFile& target_dex_file; - // Either the dex cache array element offset or the string index. + // Either the dex cache array element offset or the string/type index. uint32_t offset_or_index; // Label for the instruction loading the most significant half of the offset that's added to PC // to form the base address (the least significant half is loaded with the instruction that @@ -393,14 +406,27 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsLabel pc_rel_label; }; + PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, uint32_t string_index); + PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, uint32_t type_index); PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + Literal* DeduplicateBootImageStringLiteral(const DexFile& dex_file, uint32_t string_index); + Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index); + Literal* DeduplicateBootImageAddressLiteral(uint32_t address); private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>; - + using BootStringToLiteralMap = ArenaSafeMap<StringReference, + Literal*, + StringReferenceValueComparator>; + using BootTypeToLiteralMap = ArenaSafeMap<TypeReference, + Literal*, + TypeReferenceValueComparator>; + + Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map); Literal* DeduplicateMethodAddressLiteral(MethodReference target_method); Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); @@ -417,11 +443,27 @@ class CodeGeneratorMIPS : public CodeGenerator { MipsAssembler assembler_; const MipsInstructionSetFeatures& isa_features_; + // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. + Uint32ToLiteralMap uint32_literals_; // Method patch info, map MethodReference to a literal for method address and method code. MethodToLiteralMap method_patches_; MethodToLiteralMap call_patches_; // PC-relative patch info for each HMipsDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; + // Deduplication map for boot string literals for kBootImageLinkTimeAddress. + BootStringToLiteralMap boot_image_string_patches_; + // PC-relative String patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // Deduplication map for boot type literals for kBootImageLinkTimeAddress. + BootTypeToLiteralMap boot_image_type_patches_; + // PC-relative type patch info. + ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + // Deduplication map for patchable boot image addresses. + Uint32ToLiteralMap boot_image_address_patches_; + + // PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods. + // This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations. + bool clobbered_ra_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS); }; diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 4b462cc800..197f86b22b 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -18,7 +18,6 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_ #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 12901724e7..39ea7d53a6 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -19,7 +19,6 @@ #include "arch/x86/instruction_set_features_x86.h" #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index cf92d68c64..fbb78bc5f7 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -19,7 +19,6 @@ #include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator.h" -#include "dex/compiler_enums.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc index 0f42d9ce0f..19bab08eb4 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -14,6 +14,7 @@ * limitations under the License. */ +#include "code_generator_mips.h" #include "dex_cache_array_fixups_mips.h" #include "base/arena_containers.h" @@ -27,8 +28,9 @@ namespace mips { */ class DexCacheArrayFixupsVisitor : public HGraphVisitor { public: - explicit DexCacheArrayFixupsVisitor(HGraph* graph) + explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) : HGraphVisitor(graph), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), dex_cache_array_bases_(std::less<const DexFile*>(), // Attribute memory use to code generator. graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} @@ -41,9 +43,45 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { HMipsDexCacheArraysBase* base = entry.second; base->MoveBeforeFirstUserAndOutOfLoops(); } + // Computing the dex cache base for PC-relative accesses will clobber RA with + // the NAL instruction on R2. Take a note of this before generating the method + // entry. + if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) { + codegen_->ClobberRA(); + } } private: + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + // If this is a load with PC-relative access to the dex cache types array, + // we need to add the dex cache arrays base as the special input. + if (load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCachePcRelative) { + // Initialize base for target dex file if needed. + const DexFile& dex_file = load_class->GetDexFile(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, &dex_file); + base->UpdateElementOffset(layout.TypeOffset(load_class->GetTypeIndex())); + // Add the special argument base to the load. + load_class->AddSpecialInput(base); + } + } + + void VisitLoadString(HLoadString* load_string) OVERRIDE { + // If this is a load with PC-relative access to the dex cache strings array, + // we need to add the dex cache arrays base as the special input. + if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { + // Initialize base for target dex file if needed. + const DexFile& dex_file = load_string->GetDexFile(); + HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); + // Update the element offset in base. + DexCacheArraysLayout layout(kMipsPointerSize, &dex_file); + base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); + // Add the special argument base to the load. + load_string->AddSpecialInput(base); + } + } + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. @@ -74,6 +112,8 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { }); } + CodeGeneratorMIPS* codegen_; + using DexCacheArraysBaseMap = ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; DexCacheArraysBaseMap dex_cache_array_bases_; @@ -85,7 +125,7 @@ void DexCacheArrayFixups::Run() { // that can be live-in at the irreducible loop header. return; } - DexCacheArrayFixupsVisitor visitor(graph_); + DexCacheArrayFixupsVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBasesIfNeeded(); } diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h index c8def2842e..21056e130a 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.h +++ b/compiler/optimizing/dex_cache_array_fixups_mips.h @@ -21,14 +21,21 @@ #include "optimization.h" namespace art { + +class CodeGenerator; + namespace mips { class DexCacheArrayFixups : public HOptimization { public: - DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, "dex_cache_array_fixups_mips", stats) {} + DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, "dex_cache_array_fixups_mips", stats), + codegen_(codegen) {} void Run() OVERRIDE; + + private: + CodeGenerator* codegen_; }; } // namespace mips diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index afac5f9cf1..e5dab569fd 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -19,6 +19,7 @@ #include "art_method-inl.h" #include "bytecode_utils.h" #include "class_linker.h" +#include "dex_instruction-inl.h" #include "driver/compiler_options.h" #include "scoped_thread_state_change.h" diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 9cfc065da6..517cf76831 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -30,6 +30,8 @@ namespace art { +class Instruction; + class HInstructionBuilder : public ValueObject { public: HInstructionBuilder(HGraph* graph, diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index d557f42968..2808e1b5fc 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2632,4 +2632,23 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) { } } +std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) { + switch (kind) { + case MemBarrierKind::kAnyStore: + return os << "AnyStore"; + case MemBarrierKind::kLoadAny: + return os << "LoadAny"; + case MemBarrierKind::kStoreStore: + return os << "StoreStore"; + case MemBarrierKind::kAnyAny: + return os << "AnyAny"; + case MemBarrierKind::kNTStoreStore: + return os << "NTStoreStore"; + + default: + LOG(FATAL) << "Unknown MemBarrierKind: " << static_cast<int>(kind); + UNREACHABLE(); + } +} + } // namespace art diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 23ac457568..dfa8276651 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -25,7 +25,6 @@ #include "base/arena_containers.h" #include "base/arena_object.h" #include "base/stl_util.h" -#include "dex/compiler_enums.h" #include "dex_file.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" @@ -5626,9 +5625,12 @@ inline uint32_t HLoadClass::GetDexCacheElementOffset() const { // Note: defined outside class to see operator<<(., HLoadClass::LoadKind). inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { - // The special input is used for PC-relative loads on some architectures. + // The special input is used for PC-relative loads on some architectures, + // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind(); + GetLoadKind() == LoadKind::kDexCachePcRelative || + GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || + GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); special_input_ = HUserRecord<HInstruction*>(special_input); special_input->AddUseAt(this, 0); @@ -5836,9 +5838,12 @@ inline uint32_t HLoadString::GetDexCacheElementOffset() const { // Note: defined outside class to see operator<<(., HLoadString::LoadKind). inline void HLoadString::AddSpecialInput(HInstruction* special_input) { - // The special input is used for PC-relative loads on some architectures. + // The special input is used for PC-relative loads on some architectures, + // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kDexCachePcRelative) << GetLoadKind(); + GetLoadKind() == LoadKind::kDexCachePcRelative || + GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || + GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. DCHECK(special_input_.GetInstruction() == nullptr); @@ -6305,6 +6310,32 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { DISALLOW_COPY_AND_ASSIGN(HCheckCast); }; +/** + * @brief Memory barrier types (see "The JSR-133 Cookbook for Compiler Writers"). + * @details We define the combined barrier types that are actually required + * by the Java Memory Model, rather than using exactly the terminology from + * the JSR-133 cookbook. These should, in many cases, be replaced by acquire/release + * primitives. Note that the JSR-133 cookbook generally does not deal with + * store atomicity issues, and the recipes there are not always entirely sufficient. + * The current recipe is as follows: + * -# Use AnyStore ~= (LoadStore | StoreStore) ~= release barrier before volatile store. + * -# Use AnyAny barrier after volatile store. (StoreLoad is as expensive.) + * -# Use LoadAny barrier ~= (LoadLoad | LoadStore) ~= acquire barrier after each volatile load. + * -# Use StoreStore barrier after all stores but before return from any constructor whose + * class has final fields. + * -# Use NTStoreStore to order non-temporal stores with respect to all later + * store-to-memory instructions. Only generated together with non-temporal stores. + */ +enum MemBarrierKind { + kAnyStore, + kLoadAny, + kStoreStore, + kAnyAny, + kNTStoreStore, + kLastBarrierKind = kNTStoreStore +}; +std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind); + class HMemoryBarrier FINAL : public HTemplateInstruction<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc) diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index aedfcb42aa..d5b0d77fe5 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -504,7 +504,7 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, mips::PcRelativeFixups* pc_relative_fixups = new (arena) mips::PcRelativeFixups(graph, codegen, stats); mips::DexCacheArrayFixups* dex_cache_array_fixups = - new (arena) mips::DexCacheArrayFixups(graph, stats); + new (arena) mips::DexCacheArrayFixups(graph, codegen, stats); HOptimization* mips_optimizations[] = { pc_relative_fixups, dex_cache_array_fixups diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index ba405cdb69..c6acc45581 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -37,6 +37,10 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // entry block) and relieve some pressure on the register allocator // while avoiding recalculation of the base in a loop. base_->MoveBeforeFirstUserAndOutOfLoops(); + // Computing the base for PC-relative literals will clobber RA with + // the NAL instruction on R2. Take a note of this before generating + // the method entry. + codegen_->ClobberRA(); } } @@ -58,6 +62,36 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { DCHECK(base_ != nullptr); } + void VisitLoadClass(HLoadClass* load_class) OVERRIDE { + HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); + switch (load_kind) { + case HLoadClass::LoadKind::kBootImageLinkTimeAddress: + case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + // Add a base register for PC-relative literals on R2. + InitializePCRelativeBasePointer(); + load_class->AddSpecialInput(base_); + break; + default: + break; + } + } + + void VisitLoadString(HLoadString* load_string) OVERRIDE { + HLoadString::LoadKind load_kind = load_string->GetLoadKind(); + switch (load_kind) { + case HLoadString::LoadKind::kBootImageLinkTimeAddress: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + // Add a base register for PC-relative literals on R2. + InitializePCRelativeBasePointer(); + load_string->AddSpecialInput(base_); + break; + default: + break; + } + } + void HandleInvoke(HInvoke* invoke) { // If this is an invoke-static/-direct with PC-relative dex cache array // addressing, we need the PC-relative address base. @@ -77,7 +111,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // method pointer from the invoke. if (invoke_static_or_direct->HasCurrentMethodInput()) { DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache()); - CHECK(!has_extra_input); // TODO: review this. + CHECK(!has_extra_input); return; } @@ -116,7 +150,6 @@ void PcRelativeFixups::Run() { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); if (mips_codegen->GetInstructionSetFeatures().IsR6()) { // Do nothing for R6 because it has PC-relative addressing. - // TODO: review. Move this check into RunArchOptimizations()? return; } if (graph_->HasIrreducibleLoops()) { diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc index ebaf1c0cab..608b3bc23c 100644 --- a/compiler/utils/mips/assembler_mips.cc +++ b/compiler/utils/mips/assembler_mips.cc @@ -2024,6 +2024,10 @@ void MipsAssembler::BindPcRelBaseLabel() { Bind(&pc_rel_base_label_); } +uint32_t MipsAssembler::GetPcRelBaseLabelLocation() const { + return GetLabelLocation(&pc_rel_base_label_); +} + void MipsAssembler::FinalizeLabeledBranch(MipsLabel* label) { uint32_t length = branches_.back().GetLength(); if (!label->IsBound()) { diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h index 1f7781fef9..8367e68ebc 100644 --- a/compiler/utils/mips/assembler_mips.h +++ b/compiler/utils/mips/assembler_mips.h @@ -646,6 +646,9 @@ class MipsAssembler FINAL : public Assembler { // The assembler then computes literal offsets relative to this label. void BindPcRelBaseLabel(); + // Returns the location of the label bound with BindPcRelBaseLabel(). + uint32_t GetPcRelBaseLabelLocation() const; + // Note that PC-relative literal loads are handled as pseudo branches because they need very // similar relocation and may similarly expand in size to accomodate for larger offsets relative // to PC. diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc index 966587d772..0e2a6720ae 100644 --- a/runtime/arch/arm/entrypoints_init_arm.cc +++ b/runtime/arch/arm/entrypoints_init_arm.cc @@ -152,8 +152,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg27 = nullptr; qpoints->pReadBarrierMarkReg28 = nullptr; qpoints->pReadBarrierMarkReg29 = nullptr; - qpoints->pReadBarrierMarkReg30 = nullptr; - qpoints->pReadBarrierMarkReg31 = nullptr; qpoints->pReadBarrierSlow = artReadBarrierSlow; qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; } diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc index 2e5f5ad89f..cc5bf29609 100644 --- a/runtime/arch/arm64/entrypoints_init_arm64.cc +++ b/runtime/arch/arm64/entrypoints_init_arm64.cc @@ -124,6 +124,15 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { // Read barrier. qpoints->pReadBarrierJni = ReadBarrierJni; + // ARM64 is the architecture with the largest number of core + // registers (32) that supports the read barrier configuration. + // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass + // arguments, only define ReadBarrierMarkRegX entrypoints for the + // first 30 registers. This limitation is not a problem on other + // supported architectures (ARM, x86 and x86-64) either, as they + // have less core registers (resp. 16, 8 and 16). (We may have to + // revise that design choice if read barrier support is added for + // MIPS and/or MIPS64.) qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00; qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01; qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02; @@ -154,8 +163,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27; qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28; qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29; - qpoints->pReadBarrierMarkReg30 = nullptr; // Cannot use register 30 (LR) to pass arguments. - qpoints->pReadBarrierMarkReg31 = nullptr; // Cannot use register 31 (SP/XZR) to pass arguments. qpoints->pReadBarrierSlow = artReadBarrierSlow; qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; }; diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc index 22efd199cf..09f8849e1a 100644 --- a/runtime/arch/mips/entrypoints_init_mips.cc +++ b/runtime/arch/mips/entrypoints_init_mips.cc @@ -376,12 +376,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg29 = nullptr; static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29), "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg30 = nullptr; - static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg30), - "Non-direct C stub marked direct."); - qpoints->pReadBarrierMarkReg31 = nullptr; - static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg31), - "Non-direct C stub marked direct."); qpoints->pReadBarrierSlow = artReadBarrierSlow; static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct."); qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc index b02edb6aba..34b0638332 100644 --- a/runtime/arch/mips64/entrypoints_init_mips64.cc +++ b/runtime/arch/mips64/entrypoints_init_mips64.cc @@ -129,8 +129,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg27 = nullptr; qpoints->pReadBarrierMarkReg28 = nullptr; qpoints->pReadBarrierMarkReg29 = nullptr; - qpoints->pReadBarrierMarkReg30 = nullptr; - qpoints->pReadBarrierMarkReg31 = nullptr; qpoints->pReadBarrierSlow = artReadBarrierSlow; qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow; }; diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc index 4e9756c54e..bdf11dab9a 100644 --- a/runtime/arch/x86/entrypoints_init_x86.cc +++ b/runtime/arch/x86/entrypoints_init_x86.cc @@ -117,8 +117,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg27 = nullptr; qpoints->pReadBarrierMarkReg28 = nullptr; qpoints->pReadBarrierMarkReg29 = nullptr; - qpoints->pReadBarrierMarkReg30 = nullptr; - qpoints->pReadBarrierMarkReg31 = nullptr; qpoints->pReadBarrierSlow = art_quick_read_barrier_slow; qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow; }; diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc index c2e3023b87..42b9699122 100644 --- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc +++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc @@ -131,8 +131,6 @@ void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) { qpoints->pReadBarrierMarkReg27 = nullptr; qpoints->pReadBarrierMarkReg28 = nullptr; qpoints->pReadBarrierMarkReg29 = nullptr; - qpoints->pReadBarrierMarkReg30 = nullptr; - qpoints->pReadBarrierMarkReg31 = nullptr; qpoints->pReadBarrierSlow = art_quick_read_barrier_slow; qpoints->pReadBarrierForRootSlow = art_quick_read_barrier_for_root_slow; #endif // __APPLE__ diff --git a/runtime/asm_support.h b/runtime/asm_support.h index e1f00ebdc7..b7df90d59d 100644 --- a/runtime/asm_support.h +++ b/runtime/asm_support.h @@ -84,7 +84,7 @@ ADD_TEST_EQ(THREAD_SELF_OFFSET, art::Thread::SelfOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.thread_local_objects. -#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 199 * __SIZEOF_POINTER__) +#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_CARD_TABLE_OFFSET + 197 * __SIZEOF_POINTER__) ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET, art::Thread::ThreadLocalObjectsOffset<__SIZEOF_POINTER__>().Int32Value()) // Offset of field Thread::tlsPtr_.thread_local_pos. diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index e0ec68ee87..07f0394773 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -194,8 +194,6 @@ V(ReadBarrierMarkReg27, mirror::Object*, mirror::Object*) \ V(ReadBarrierMarkReg28, mirror::Object*, mirror::Object*) \ V(ReadBarrierMarkReg29, mirror::Object*, mirror::Object*) \ - V(ReadBarrierMarkReg30, mirror::Object*, mirror::Object*) \ - V(ReadBarrierMarkReg31, mirror::Object*, mirror::Object*) \ V(ReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t) \ V(ReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*) diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index ffe41099ea..e3203dc14f 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -383,11 +383,7 @@ class EntrypointsOrderTest : public CommonRuntimeTest { sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg28, pReadBarrierMarkReg29, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierMarkReg30, - sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg30, pReadBarrierMarkReg31, - sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg31, pReadBarrierSlow, sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierMarkReg29, pReadBarrierSlow, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierSlow, pReadBarrierForRootSlow, sizeof(void*)); diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc index d413a50534..bd07bb8711 100644 --- a/runtime/gc/collector/concurrent_copying.cc +++ b/runtime/gc/collector/concurrent_copying.cc @@ -1714,12 +1714,19 @@ class ConcurrentCopying::RefFieldsVisitor { // Scan ref fields of an object. inline void ConcurrentCopying::Scan(mirror::Object* to_ref) { + if (kIsDebugBuild) { + // Avoid all read barriers during visit references to help performance. + Thread::Current()->ModifyDebugDisallowReadBarrier(1); + } DCHECK(!region_space_->IsInFromSpace(to_ref)); DCHECK_EQ(Thread::Current(), thread_running_gc_); RefFieldsVisitor visitor(this); // Disable the read barrier for a performance reason. to_ref->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>( visitor, visitor); + if (kIsDebugBuild) { + Thread::Current()->ModifyDebugDisallowReadBarrier(-1); + } } // Process a field. @@ -1836,7 +1843,7 @@ void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t by mirror::Class* int_array_class = mirror::IntArray::GetArrayClass(); CHECK(int_array_class != nullptr); AssertToSpaceInvariant(nullptr, MemberOffset(0), int_array_class); - size_t component_size = int_array_class->GetComponentSize(); + size_t component_size = int_array_class->GetComponentSize<kWithoutReadBarrier>(); CHECK_EQ(component_size, sizeof(int32_t)); size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue(); if (data_offset > byte_size) { @@ -1849,13 +1856,14 @@ void ConcurrentCopying::FillWithDummyObject(mirror::Object* dummy_obj, size_t by } else { // Use an int array. dummy_obj->SetClass(int_array_class); - CHECK(dummy_obj->IsArrayInstance()); + CHECK((dummy_obj->IsArrayInstance<kVerifyNone, kWithoutReadBarrier>())); int32_t length = (byte_size - data_offset) / component_size; - dummy_obj->AsArray()->SetLength(length); - CHECK_EQ(dummy_obj->AsArray()->GetLength(), length) + mirror::Array* dummy_arr = dummy_obj->AsArray<kVerifyNone, kWithoutReadBarrier>(); + dummy_arr->SetLength(length); + CHECK_EQ(dummy_arr->GetLength(), length) << "byte_size=" << byte_size << " length=" << length << " component_size=" << component_size << " data_offset=" << data_offset; - CHECK_EQ(byte_size, dummy_obj->SizeOf()) + CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>())) << "byte_size=" << byte_size << " length=" << length << " component_size=" << component_size << " data_offset=" << data_offset; } diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc index 6088a43ab1..62625c41b4 100644 --- a/runtime/gc/reference_queue.cc +++ b/runtime/gc/reference_queue.cc @@ -44,7 +44,9 @@ void ReferenceQueue::EnqueueReference(mirror::Reference* ref) { // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref; list_ = ref; } else { - mirror::Reference* head = list_->GetPendingNext(); + // The list is owned by the GC, everything that has been inserted must already be at least + // gray. + mirror::Reference* head = list_->GetPendingNext<kWithoutReadBarrier>(); DCHECK(head != nullptr); ref->SetPendingNext(head); } @@ -54,14 +56,14 @@ void ReferenceQueue::EnqueueReference(mirror::Reference* ref) { mirror::Reference* ReferenceQueue::DequeuePendingReference() { DCHECK(!IsEmpty()); - mirror::Reference* ref = list_->GetPendingNext(); + mirror::Reference* ref = list_->GetPendingNext<kWithoutReadBarrier>(); DCHECK(ref != nullptr); // Note: the following code is thread-safe because it is only called from ProcessReferences which // is single threaded. if (list_ == ref) { list_ = nullptr; } else { - mirror::Reference* next = ref->GetPendingNext(); + mirror::Reference* next = ref->GetPendingNext<kWithoutReadBarrier>(); list_->SetPendingNext(next); } ref->SetPendingNext(nullptr); diff --git a/runtime/interpreter/mterp/arm64/binopLit8.S b/runtime/interpreter/mterp/arm64/binopLit8.S index 326c657652..0b7c68a64b 100644 --- a/runtime/interpreter/mterp/arm64/binopLit8.S +++ b/runtime/interpreter/mterp/arm64/binopLit8.S @@ -13,7 +13,7 @@ * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB diff --git a/runtime/interpreter/mterp/arm64/header.S b/runtime/interpreter/mterp/arm64/header.S index 4257200bb7..c791eb5ec9 100644 --- a/runtime/interpreter/mterp/arm64/header.S +++ b/runtime/interpreter/mterp/arm64/header.S @@ -272,6 +272,14 @@ codes. .endm /* + * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit. + * Used to avoid an extra instruction in int-to-long. + */ +.macro GET_VREG_S reg, vreg + ldrsw \reg, [xFP, \vreg, uxtw #2] +.endm + +/* * Convert a virtual register index into an address. */ .macro VREG_INDEX_TO_ADDR reg, vreg diff --git a/runtime/interpreter/mterp/arm64/op_const_16.S b/runtime/interpreter/mterp/arm64/op_const_16.S index 27f527313d..f0e81923c5 100644 --- a/runtime/interpreter/mterp/arm64/op_const_16.S +++ b/runtime/interpreter/mterp/arm64/op_const_16.S @@ -1,5 +1,5 @@ /* const/16 vAA, #+BBBB */ - FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended + FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended) lsr w3, wINST, #8 // w3<- AA FETCH_ADVANCE_INST 2 // advance xPC, load wINST SET_VREG w0, w3 // vAA<- w0 diff --git a/runtime/interpreter/mterp/arm64/op_const_4.S b/runtime/interpreter/mterp/arm64/op_const_4.S index 04cd4f81b6..9a36115288 100644 --- a/runtime/interpreter/mterp/arm64/op_const_4.S +++ b/runtime/interpreter/mterp/arm64/op_const_4.S @@ -1,8 +1,7 @@ /* const/4 vA, #+B */ - lsl w1, wINST, #16 // w1<- Bxxx0000 + sbfx w1, wINST, #12, #4 // w1<- sssssssB ubfx w0, wINST, #8, #4 // w0<- A FETCH_ADVANCE_INST 1 // advance xPC, load wINST - asr w1, w1, #28 // w1<- sssssssB (sign-extended) GET_INST_OPCODE ip // ip<- opcode from xINST SET_VREG w1, w0 // fp[A]<- w1 GOTO_OPCODE ip // execute next instruction diff --git a/runtime/interpreter/mterp/arm64/op_const_high16.S b/runtime/interpreter/mterp/arm64/op_const_high16.S index dd51ce1ebe..3a9edfff91 100644 --- a/runtime/interpreter/mterp/arm64/op_const_high16.S +++ b/runtime/interpreter/mterp/arm64/op_const_high16.S @@ -1,5 +1,5 @@ /* const/high16 vAA, #+BBBB0000 */ - FETCH w0, 1 // r0<- 0000BBBB (zero-extended + FETCH w0, 1 // r0<- 0000BBBB (zero-extended) lsr w3, wINST, #8 // r3<- AA lsl w0, w0, #16 // r0<- BBBB0000 FETCH_ADVANCE_INST 2 // advance rPC, load rINST diff --git a/runtime/interpreter/mterp/arm64/op_if_eqz.S b/runtime/interpreter/mterp/arm64/op_if_eqz.S index 1d3202e16c..47c1dee8cf 100644 --- a/runtime/interpreter/mterp/arm64/op_if_eqz.S +++ b/runtime/interpreter/mterp/arm64/op_if_eqz.S @@ -1 +1 @@ -%include "arm64/zcmp.S" { "condition":"eq" } +%include "arm64/zcmp.S" { "compare":"0", "branch":"cbz w2," } diff --git a/runtime/interpreter/mterp/arm64/op_if_gez.S b/runtime/interpreter/mterp/arm64/op_if_gez.S index 8e3abd3557..087e094a76 100644 --- a/runtime/interpreter/mterp/arm64/op_if_gez.S +++ b/runtime/interpreter/mterp/arm64/op_if_gez.S @@ -1 +1 @@ -%include "arm64/zcmp.S" { "condition":"ge" } +%include "arm64/zcmp.S" { "compare":"0", "branch":"tbz w2, #31," } diff --git a/runtime/interpreter/mterp/arm64/op_if_gtz.S b/runtime/interpreter/mterp/arm64/op_if_gtz.S index a4f2f6b661..476b265431 100644 --- a/runtime/interpreter/mterp/arm64/op_if_gtz.S +++ b/runtime/interpreter/mterp/arm64/op_if_gtz.S @@ -1 +1 @@ -%include "arm64/zcmp.S" { "condition":"gt" } +%include "arm64/zcmp.S" { "branch":"b.gt" } diff --git a/runtime/interpreter/mterp/arm64/op_if_lez.S b/runtime/interpreter/mterp/arm64/op_if_lez.S index c1425fddcf..2717a60ebf 100644 --- a/runtime/interpreter/mterp/arm64/op_if_lez.S +++ b/runtime/interpreter/mterp/arm64/op_if_lez.S @@ -1 +1 @@ -%include "arm64/zcmp.S" { "condition":"le" } +%include "arm64/zcmp.S" { "branch":"b.le" } diff --git a/runtime/interpreter/mterp/arm64/op_if_ltz.S b/runtime/interpreter/mterp/arm64/op_if_ltz.S index 03cd3d6973..86089c10ba 100644 --- a/runtime/interpreter/mterp/arm64/op_if_ltz.S +++ b/runtime/interpreter/mterp/arm64/op_if_ltz.S @@ -1 +1 @@ -%include "arm64/zcmp.S" { "condition":"lt" } +%include "arm64/zcmp.S" { "compare":"0", "branch":"tbnz w2, #31," } diff --git a/runtime/interpreter/mterp/arm64/op_if_nez.S b/runtime/interpreter/mterp/arm64/op_if_nez.S index 21e1bc2170..efacc88806 100644 --- a/runtime/interpreter/mterp/arm64/op_if_nez.S +++ b/runtime/interpreter/mterp/arm64/op_if_nez.S @@ -1 +1 @@ -%include "arm64/zcmp.S" { "condition":"ne" } +%include "arm64/zcmp.S" { "compare":"0", "branch":"cbnz w2," } diff --git a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S index 30b30c2d4d..e9388e477d 100644 --- a/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iget_wide_quick.S @@ -4,8 +4,7 @@ GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A cbz w3, common_errNullObject // object was null - add x4, x3, x4 // create direct pointer - ldr x0, [x4] + ldr x0, [x3, x4] // x0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load wINST SET_VREG_WIDE x0, w2 GET_INST_OPCODE ip // extract opcode from wINST diff --git a/runtime/interpreter/mterp/arm64/op_int_to_long.S b/runtime/interpreter/mterp/arm64/op_int_to_long.S index 35830f3881..45e31124ff 100644 --- a/runtime/interpreter/mterp/arm64/op_int_to_long.S +++ b/runtime/interpreter/mterp/arm64/op_int_to_long.S @@ -1 +1,8 @@ -%include "arm64/funopWider.S" {"instr":"sxtw x0, w0", "srcreg":"w0", "tgtreg":"x0"} + /* int-to-long vA, vB */ + lsr w3, wINST, #12 // w3<- B + ubfx w4, wINST, #8, #4 // w4<- A + GET_VREG_S x0, w3 // x0<- sign_extend(fp[B]) + FETCH_ADVANCE_INST 1 // advance rPC, load wINST + GET_INST_OPCODE ip // extract opcode from wINST + SET_VREG_WIDE x0, w4 // fp[A]<- x0 + GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/op_iput_quick.S b/runtime/interpreter/mterp/arm64/op_iput_quick.S index 2afc51beed..e95da76283 100644 --- a/runtime/interpreter/mterp/arm64/op_iput_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iput_quick.S @@ -5,7 +5,6 @@ FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- fp[B], the object pointer ubfx w2, wINST, #8, #4 // w2<- A - cmp w3, #0 // check object for null cbz w3, common_errNullObject // object was null GET_VREG w0, w2 // w0<- fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load rINST diff --git a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S index 566e2bfdd4..6cec3633a6 100644 --- a/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S +++ b/runtime/interpreter/mterp/arm64/op_iput_wide_quick.S @@ -6,7 +6,6 @@ cbz w2, common_errNullObject // object was null GET_VREG_WIDE x0, w0 // x0-< fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load wINST - add x1, x2, x3 // create a direct pointer - str x0, [x1] + str x0, [x2, x3] // obj.field<- x0 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction diff --git a/runtime/interpreter/mterp/arm64/op_long_to_int.S b/runtime/interpreter/mterp/arm64/op_long_to_int.S index 360a69b908..73f58d8967 100644 --- a/runtime/interpreter/mterp/arm64/op_long_to_int.S +++ b/runtime/interpreter/mterp/arm64/op_long_to_int.S @@ -1 +1,2 @@ -%include "arm64/funopNarrower.S" {"instr":"", "srcreg":"x0", "tgtreg":"w0"} +/* we ignore the high word, making this equivalent to a 32-bit reg move */ +%include "arm64/op_move.S" diff --git a/runtime/interpreter/mterp/arm64/op_neg_double.S b/runtime/interpreter/mterp/arm64/op_neg_double.S index e9064c47ce..d77859d570 100644 --- a/runtime/interpreter/mterp/arm64/op_neg_double.S +++ b/runtime/interpreter/mterp/arm64/op_neg_double.S @@ -1 +1 @@ -%include "arm64/unopWide.S" {"preinstr":"mov x1, #0x8000000000000000", "instr":"add x0, x0, x1"} +%include "arm64/unopWide.S" {"instr":"eor x0, x0, #0x8000000000000000"} diff --git a/runtime/interpreter/mterp/arm64/op_neg_float.S b/runtime/interpreter/mterp/arm64/op_neg_float.S index 49d51afa5b..6652aec0ff 100644 --- a/runtime/interpreter/mterp/arm64/op_neg_float.S +++ b/runtime/interpreter/mterp/arm64/op_neg_float.S @@ -1 +1 @@ -%include "arm64/unop.S" {"preinstr":"mov w4, #0x80000000", "instr":"add w0, w0, w4"} +%include "arm64/unop.S" {"instr":"eor w0, w0, #0x80000000"} diff --git a/runtime/interpreter/mterp/arm64/unop.S b/runtime/interpreter/mterp/arm64/unop.S index 474a961837..e681968a9f 100644 --- a/runtime/interpreter/mterp/arm64/unop.S +++ b/runtime/interpreter/mterp/arm64/unop.S @@ -1,4 +1,3 @@ -%default {"preinstr":""} /* * Generic 32-bit unary operation. Provide an "instr" line that * specifies an instruction that performs "result = op w0". @@ -11,7 +10,6 @@ lsr w3, wINST, #12 // w3<- B GET_VREG w0, w3 // w0<- vB ubfx w9, wINST, #8, #4 // w9<- A - $preinstr // optional op; may set condition codes FETCH_ADVANCE_INST 1 // advance rPC, load rINST $instr // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST diff --git a/runtime/interpreter/mterp/arm64/unopWide.S b/runtime/interpreter/mterp/arm64/unopWide.S index 109302a128..6ee4f922e1 100644 --- a/runtime/interpreter/mterp/arm64/unopWide.S +++ b/runtime/interpreter/mterp/arm64/unopWide.S @@ -1,4 +1,4 @@ -%default {"instr":"sub x0, xzr, x0", "preinstr":""} +%default {"instr":"sub x0, xzr, x0"} /* * Generic 64-bit unary operation. Provide an "instr" line that * specifies an instruction that performs "result = op x0". @@ -10,7 +10,6 @@ ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - $preinstr $instr GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 diff --git a/runtime/interpreter/mterp/arm64/zcmp.S b/runtime/interpreter/mterp/arm64/zcmp.S index b303e6a13f..510a3c10cd 100644 --- a/runtime/interpreter/mterp/arm64/zcmp.S +++ b/runtime/interpreter/mterp/arm64/zcmp.S @@ -1,3 +1,4 @@ +%default { "compare":"1" } /* * Generic one-operand compare-and-branch operation. Provide a "condition" * fragment that specifies the comparison to perform. @@ -8,8 +9,10 @@ lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA FETCH_S wINST, 1 // w1<- branch offset, in code units + .if ${compare} cmp w2, #0 // compare (vA, 0) - b.${condition} MterpCommonTakenBranchNoFlags + .endif + ${branch} MterpCommonTakenBranchNoFlags cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? b.eq .L_check_not_taken_osr FETCH_ADVANCE_INST 2 diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64 index 57206d25ba..6427ead6f3 100644 --- a/runtime/interpreter/mterp/config_arm64 +++ b/runtime/interpreter/mterp/config_arm64 @@ -20,9 +20,6 @@ handler-style computed-goto handler-size 128 -# source for alternate entry stub -asm-alt-stub arm64/alt_stub.S - # file header and basic definitions import arm64/header.S @@ -295,5 +292,12 @@ op-start arm64 # op op_unused_ff FALLBACK op-end -# common subroutines for asm +# common subroutines for asm; we emit the footer before alternate +# entry stubs, so that TBZ/TBNZ from ops can reach targets in footer import arm64/footer.S + +# source for alternate entry stub +asm-alt-stub arm64/alt_stub.S + +# emit alternate entry stubs +alt-ops diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S index e318782c6d..de37e07e31 100644 --- a/runtime/interpreter/mterp/out/mterp_arm64.S +++ b/runtime/interpreter/mterp/out/mterp_arm64.S @@ -279,6 +279,14 @@ codes. .endm /* + * Get the 32-bit value from a Dalvik register and sign-extend to 64-bit. + * Used to avoid an extra instruction in int-to-long. + */ +.macro GET_VREG_S reg, vreg + ldrsw \reg, [xFP, \vreg, uxtw #2] +.endm + +/* * Convert a virtual register index into an address. */ .macro VREG_INDEX_TO_ADDR reg, vreg @@ -695,10 +703,9 @@ artMterpAsmInstructionStart = .L_op_nop .L_op_const_4: /* 0x12 */ /* File: arm64/op_const_4.S */ /* const/4 vA, #+B */ - lsl w1, wINST, #16 // w1<- Bxxx0000 + sbfx w1, wINST, #12, #4 // w1<- sssssssB ubfx w0, wINST, #8, #4 // w0<- A FETCH_ADVANCE_INST 1 // advance xPC, load wINST - asr w1, w1, #28 // w1<- sssssssB (sign-extended) GET_INST_OPCODE ip // ip<- opcode from xINST SET_VREG w1, w0 // fp[A]<- w1 GOTO_OPCODE ip // execute next instruction @@ -708,7 +715,7 @@ artMterpAsmInstructionStart = .L_op_nop .L_op_const_16: /* 0x13 */ /* File: arm64/op_const_16.S */ /* const/16 vAA, #+BBBB */ - FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended + FETCH_S w0, 1 // w0<- ssssBBBB (sign-extended) lsr w3, wINST, #8 // w3<- AA FETCH_ADVANCE_INST 2 // advance xPC, load wINST SET_VREG w0, w3 // vAA<- w0 @@ -734,7 +741,7 @@ artMterpAsmInstructionStart = .L_op_nop .L_op_const_high16: /* 0x15 */ /* File: arm64/op_const_high16.S */ /* const/high16 vAA, #+BBBB0000 */ - FETCH w0, 1 // r0<- 0000BBBB (zero-extended + FETCH w0, 1 // r0<- 0000BBBB (zero-extended) lsr w3, wINST, #8 // r3<- AA lsl w0, w0, #16 // r0<- BBBB0000 FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -1465,8 +1472,10 @@ artMterpAsmInstructionStart = .L_op_nop lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA FETCH_S wINST, 1 // w1<- branch offset, in code units + .if 0 cmp w2, #0 // compare (vA, 0) - b.eq MterpCommonTakenBranchNoFlags + .endif + cbz w2, MterpCommonTakenBranchNoFlags cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? b.eq .L_check_not_taken_osr FETCH_ADVANCE_INST 2 @@ -1489,8 +1498,10 @@ artMterpAsmInstructionStart = .L_op_nop lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA FETCH_S wINST, 1 // w1<- branch offset, in code units + .if 0 cmp w2, #0 // compare (vA, 0) - b.ne MterpCommonTakenBranchNoFlags + .endif + cbnz w2, MterpCommonTakenBranchNoFlags cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? b.eq .L_check_not_taken_osr FETCH_ADVANCE_INST 2 @@ -1513,8 +1524,10 @@ artMterpAsmInstructionStart = .L_op_nop lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA FETCH_S wINST, 1 // w1<- branch offset, in code units + .if 0 cmp w2, #0 // compare (vA, 0) - b.lt MterpCommonTakenBranchNoFlags + .endif + tbnz w2, #31, MterpCommonTakenBranchNoFlags cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? b.eq .L_check_not_taken_osr FETCH_ADVANCE_INST 2 @@ -1537,8 +1550,10 @@ artMterpAsmInstructionStart = .L_op_nop lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA FETCH_S wINST, 1 // w1<- branch offset, in code units + .if 0 cmp w2, #0 // compare (vA, 0) - b.ge MterpCommonTakenBranchNoFlags + .endif + tbz w2, #31, MterpCommonTakenBranchNoFlags cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? b.eq .L_check_not_taken_osr FETCH_ADVANCE_INST 2 @@ -1561,7 +1576,9 @@ artMterpAsmInstructionStart = .L_op_nop lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA FETCH_S wINST, 1 // w1<- branch offset, in code units + .if 1 cmp w2, #0 // compare (vA, 0) + .endif b.gt MterpCommonTakenBranchNoFlags cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? b.eq .L_check_not_taken_osr @@ -1585,7 +1602,9 @@ artMterpAsmInstructionStart = .L_op_nop lsr w0, wINST, #8 // w0<- AA GET_VREG w2, w0 // w2<- vAA FETCH_S wINST, 1 // w1<- branch offset, in code units + .if 1 cmp w2, #0 // compare (vA, 0) + .endif b.le MterpCommonTakenBranchNoFlags cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? b.eq .L_check_not_taken_osr @@ -3192,7 +3211,6 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, wINST, #12 // w3<- B GET_VREG w0, w3 // w0<- vB ubfx w9, wINST, #8, #4 // w9<- A - // optional op; may set condition codes FETCH_ADVANCE_INST 1 // advance rPC, load rINST sub w0, wzr, w0 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST @@ -3218,7 +3236,6 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, wINST, #12 // w3<- B GET_VREG w0, w3 // w0<- vB ubfx w9, wINST, #8, #4 // w9<- A - // optional op; may set condition codes FETCH_ADVANCE_INST 1 // advance rPC, load rINST mvn w0, w0 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST @@ -3243,7 +3260,6 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - sub x0, xzr, x0 GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 @@ -3267,7 +3283,6 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - mvn x0, x0 GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 @@ -3292,9 +3307,8 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, wINST, #12 // w3<- B GET_VREG w0, w3 // w0<- vB ubfx w9, wINST, #8, #4 // w9<- A - mov w4, #0x80000000 // optional op; may set condition codes FETCH_ADVANCE_INST 1 // advance rPC, load rINST - add w0, w0, w4 // w0<- op, w0-w3 changed + eor w0, w0, #0x80000000 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST SET_VREG w0, w9 // vAA<- w0 GOTO_OPCODE ip // jump to next instruction @@ -3317,8 +3331,7 @@ artMterpAsmInstructionStart = .L_op_nop ubfx w4, wINST, #8, #4 // w4<- A GET_VREG_WIDE x0, w3 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - mov x1, #0x8000000000000000 - add x0, x0, x1 + eor x0, x0, #0x8000000000000000 GET_INST_OPCODE ip // extract opcode from wINST SET_VREG_WIDE x0, w4 GOTO_OPCODE ip // jump to next instruction @@ -3329,24 +3342,15 @@ artMterpAsmInstructionStart = .L_op_nop .balign 128 .L_op_int_to_long: /* 0x81 */ /* File: arm64/op_int_to_long.S */ -/* File: arm64/funopWider.S */ - /* - * Generic 32bit-to-64bit floating point unary operation. Provide an - * "instr" line that specifies an instruction that performs "x0 = op w0". - * - * For: int-to-double, float-to-double, float-to-long - */ - /* unop vA, vB */ + /* int-to-long vA, vB */ lsr w3, wINST, #12 // w3<- B ubfx w4, wINST, #8, #4 // w4<- A - GET_VREG w0, w3 + GET_VREG_S x0, w3 // x0<- sign_extend(fp[B]) FETCH_ADVANCE_INST 1 // advance rPC, load wINST - sxtw x0, w0 // d0<- op GET_INST_OPCODE ip // extract opcode from wINST - SET_VREG_WIDE x0, w4 // vA<- d0 + SET_VREG_WIDE x0, w4 // fp[A]<- x0 GOTO_OPCODE ip // jump to next instruction - /* ------------------------------ */ .balign 128 .L_op_int_to_float: /* 0x82 */ @@ -3396,22 +3400,21 @@ artMterpAsmInstructionStart = .L_op_nop .balign 128 .L_op_long_to_int: /* 0x84 */ /* File: arm64/op_long_to_int.S */ -/* File: arm64/funopNarrower.S */ - /* - * Generic 64bit-to-32bit floating point unary operation. Provide an - * "instr" line that specifies an instruction that performs "w0 = op x0". - * - * For: int-to-double, float-to-double, float-to-long - */ - /* unop vA, vB */ - lsr w3, wINST, #12 // w3<- B - ubfx w4, wINST, #8, #4 // w4<- A - GET_VREG_WIDE x0, w3 +/* we ignore the high word, making this equivalent to a 32-bit reg move */ +/* File: arm64/op_move.S */ + /* for move, move-object, long-to-int */ + /* op vA, vB */ + lsr w1, wINST, #12 // x1<- B from 15:12 + ubfx w0, wINST, #8, #4 // x0<- A from 11:8 FETCH_ADVANCE_INST 1 // advance rPC, load wINST - // d0<- op - GET_INST_OPCODE ip // extract opcode from wINST - SET_VREG w0, w4 // vA<- d0 - GOTO_OPCODE ip // jump to next instruction + GET_VREG w2, w1 // x2<- fp[B] + GET_INST_OPCODE ip // ip<- opcode from wINST + .if 0 + SET_VREG_OBJECT w2, w0 // fp[A]<- x2 + .else + SET_VREG w2, w0 // fp[A]<- x2 + .endif + GOTO_OPCODE ip // execute next instruction /* ------------------------------ */ @@ -3608,7 +3611,6 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, wINST, #12 // w3<- B GET_VREG w0, w3 // w0<- vB ubfx w9, wINST, #8, #4 // w9<- A - // optional op; may set condition codes FETCH_ADVANCE_INST 1 // advance rPC, load rINST sxtb w0, w0 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST @@ -3634,7 +3636,6 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, wINST, #12 // w3<- B GET_VREG w0, w3 // w0<- vB ubfx w9, wINST, #8, #4 // w9<- A - // optional op; may set condition codes FETCH_ADVANCE_INST 1 // advance rPC, load rINST uxth w0, w0 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST @@ -3660,7 +3661,6 @@ artMterpAsmInstructionStart = .L_op_nop lsr w3, wINST, #12 // w3<- B GET_VREG w0, w3 // w0<- vB ubfx w9, wINST, #8, #4 // w9<- A - // optional op; may set condition codes FETCH_ADVANCE_INST 1 // advance rPC, load rINST sxth w0, w0 // w0<- op, w0-w3 changed GET_INST_OPCODE ip // extract opcode from rINST @@ -6052,7 +6052,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6088,7 +6088,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6125,7 +6125,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6161,7 +6161,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6197,7 +6197,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6233,7 +6233,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6269,7 +6269,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6305,7 +6305,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6341,7 +6341,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6377,7 +6377,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6413,7 +6413,7 @@ artMterpAsmInstructionStart = .L_op_nop * shl-int/lit8, shr-int/lit8, ushr-int/lit8 */ /* binop/lit8 vAA, vBB, #+CC */ - FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC + FETCH_S w3, 1 // w3<- ssssCCBB (sign-extended for CC) lsr w9, wINST, #8 // w9<- AA and w2, w3, #255 // w2<- BB GET_VREG w0, w2 // w0<- vBB @@ -6458,8 +6458,7 @@ artMterpAsmInstructionStart = .L_op_nop GET_VREG w3, w2 // w3<- object we're operating on ubfx w2, wINST, #8, #4 // w2<- A cbz w3, common_errNullObject // object was null - add x4, x3, x4 // create direct pointer - ldr x0, [x4] + ldr x0, [x3, x4] // x0<- obj.field FETCH_ADVANCE_INST 2 // advance rPC, load wINST SET_VREG_WIDE x0, w2 GET_INST_OPCODE ip // extract opcode from wINST @@ -6495,7 +6494,6 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- fp[B], the object pointer ubfx w2, wINST, #8, #4 // w2<- A - cmp w3, #0 // check object for null cbz w3, common_errNullObject // object was null GET_VREG w0, w2 // w0<- fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6515,8 +6513,7 @@ artMterpAsmInstructionStart = .L_op_nop cbz w2, common_errNullObject // object was null GET_VREG_WIDE x0, w0 // x0-< fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load wINST - add x1, x2, x3 // create a direct pointer - str x0, [x1] + str x0, [x2, x3] // obj.field<- x0 GET_INST_OPCODE ip // extract opcode from wINST GOTO_OPCODE ip // jump to next instruction @@ -6597,7 +6594,6 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- fp[B], the object pointer ubfx w2, wINST, #8, #4 // w2<- A - cmp w3, #0 // check object for null cbz w3, common_errNullObject // object was null GET_VREG w0, w2 // w0<- fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6617,7 +6613,6 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- fp[B], the object pointer ubfx w2, wINST, #8, #4 // w2<- A - cmp w3, #0 // check object for null cbz w3, common_errNullObject // object was null GET_VREG w0, w2 // w0<- fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6637,7 +6632,6 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- fp[B], the object pointer ubfx w2, wINST, #8, #4 // w2<- A - cmp w3, #0 // check object for null cbz w3, common_errNullObject // object was null GET_VREG w0, w2 // w0<- fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6657,7 +6651,6 @@ artMterpAsmInstructionStart = .L_op_nop FETCH w1, 1 // w1<- field byte offset GET_VREG w3, w2 // w3<- fp[B], the object pointer ubfx w2, wINST, #8, #4 // w2<- A - cmp w3, #0 // check object for null cbz w3, common_errNullObject // object was null GET_VREG w0, w2 // w0<- fp[A] FETCH_ADVANCE_INST 2 // advance rPC, load rINST @@ -6885,6 +6878,321 @@ artMterpAsmSisterStart: .global artMterpAsmSisterEnd artMterpAsmSisterEnd: +/* File: arm64/footer.S */ +/* + * =========================================================================== + * Common subroutines and data + * =========================================================================== + */ + + +/* + * We've detected a condition that will result in an exception, but the exception + * has not yet been thrown. Just bail out to the reference interpreter to deal with it. + * TUNING: for consistency, we may want to just go ahead and handle these here. + */ +common_errDivideByZero: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpLogDivideByZeroException +#endif + b MterpCommonFallback + +common_errArrayIndex: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpLogArrayIndexException +#endif + b MterpCommonFallback + +common_errNegativeArraySize: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpLogNegativeArraySizeException +#endif + b MterpCommonFallback + +common_errNoSuchMethod: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpLogNoSuchMethodException +#endif + b MterpCommonFallback + +common_errNullObject: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpLogNullObjectException +#endif + b MterpCommonFallback + +common_exceptionThrown: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpLogExceptionThrownException +#endif + b MterpCommonFallback + +MterpSuspendFallback: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + ldr x2, [xSELF, #THREAD_FLAGS_OFFSET] + bl MterpLogSuspendFallback +#endif + b MterpCommonFallback + +/* + * If we're here, something is out of the ordinary. If there is a pending + * exception, handle it. Otherwise, roll back and retry with the reference + * interpreter. + */ +MterpPossibleException: + ldr x0, [xSELF, #THREAD_EXCEPTION_OFFSET] + cbz x0, MterpFallback // If not, fall back to reference interpreter. + /* intentional fallthrough - handle pending exception. */ +/* + * On return from a runtime helper routine, we've found a pending exception. + * Can we handle it here - or need to bail out to caller? + * + */ +MterpException: + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpHandleException // (self, shadow_frame) + cbz w0, MterpExceptionReturn // no local catch, back to caller. + ldr x0, [xFP, #OFF_FP_CODE_ITEM] + ldr w1, [xFP, #OFF_FP_DEX_PC] + ldr xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] + add xPC, x0, #CODEITEM_INSNS_OFFSET + add xPC, xPC, x1, lsl #1 // generate new dex_pc_ptr + /* Do we need to switch interpreters? */ + bl MterpShouldSwitchInterpreters + cbnz w0, MterpFallback + /* resume execution at catch block */ + EXPORT_PC + FETCH_INST + GET_INST_OPCODE ip + GOTO_OPCODE ip + /* NOTE: no fallthrough */ +/* + * Common handling for branches with support for Jit profiling. + * On entry: + * wINST <= signed offset + * wPROFILE <= signed hotness countdown (expanded to 32 bits) + * condition bits <= set to establish sign of offset (use "NoFlags" entry if not) + * + * We have quite a few different cases for branch profiling, OSR detection and + * suspend check support here. + * + * Taken backward branches: + * If profiling active, do hotness countdown and report if we hit zero. + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * Is there a pending suspend request? If so, suspend. + * + * Taken forward branches and not-taken backward branches: + * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. + * + * Our most common case is expected to be a taken backward branch with active jit profiling, + * but no full OSR check and no pending suspend request. + * Next most common case is not-taken branch with no full OSR check. + * + */ +MterpCommonTakenBranchNoFlags: + cmp wINST, #0 + b.gt .L_forward_branch // don't add forward branches to hotness + tbnz wPROFILE, #31, .L_no_count_backwards // go if negative + subs wPROFILE, wPROFILE, #1 // countdown + b.eq .L_add_batch // counted down to zero - report +.L_resume_backward_branch: + ldr lr, [xSELF, #THREAD_FLAGS_OFFSET] + add w2, wINST, wINST // w2<- byte offset + FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST + REFRESH_IBASE + ands lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) + b.ne .L_suspend_request_pending + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_suspend_request_pending: + EXPORT_PC + mov x0, xSELF + bl MterpSuspendCheck // (self) + cbnz x0, MterpFallback + REFRESH_IBASE // might have changed during suspend + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_no_count_backwards: + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.ne .L_resume_backward_branch + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + b .L_resume_backward_branch + +.L_forward_branch: + cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? + b.eq .L_check_osr_forward +.L_resume_forward_branch: + add w2, wINST, wINST // w2<- byte offset + FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +.L_check_osr_forward: + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xINST + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + b .L_resume_forward_branch + +.L_add_batch: + add x1, xFP, #OFF_FP_SHADOWFRAME + strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + ldr x0, [xFP, #OFF_FP_METHOD] + mov x2, xSELF + bl MterpAddHotnessBatch // (method, shadow_frame, self) + mov wPROFILE, w0 // restore new hotness countdown to wPROFILE + b .L_no_count_backwards + +/* + * Entered from the conditional branch handlers when OSR check request active on + * not-taken path. All Dalvik not-taken conditional branch offsets are 2. + */ +.L_check_not_taken_osr: + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, #2 + EXPORT_PC + bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) + cbnz x0, MterpOnStackReplacement + FETCH_ADVANCE_INST 2 + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + + +/* + * Check for suspend check request. Assumes wINST already loaded, xPC advanced and + * still needs to get the opcode and branch to it, and flags are in lr. + */ +MterpCheckSuspendAndContinue: + ldr xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh xIBASE + ands w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) + b.ne check1 + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction +check1: + EXPORT_PC + mov x0, xSELF + bl MterpSuspendCheck // (self) + cbnz x0, MterpFallback // Something in the environment changed, switch interpreters + GET_INST_OPCODE ip // extract opcode from wINST + GOTO_OPCODE ip // jump to next instruction + +/* + * On-stack replacement has happened, and now we've returned from the compiled method. + */ +MterpOnStackReplacement: +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + sxtw x2, wINST + bl MterpLogOSR +#endif + mov x0, #1 // Signal normal return + b MterpDone + +/* + * Bail out to reference interpreter. + */ +MterpFallback: + EXPORT_PC +#if MTERP_LOGGING + mov x0, xSELF + add x1, xFP, #OFF_FP_SHADOWFRAME + bl MterpLogFallback +#endif +MterpCommonFallback: + mov x0, #0 // signal retry with reference interpreter. + b MterpDone + +/* + * We pushed some registers on the stack in ExecuteMterpImpl, then saved + * SP and LR. Here we restore SP, restore the registers, and then restore + * LR to PC. + * + * On entry: + * uint32_t* xFP (should still be live, pointer to base of vregs) + */ +MterpExceptionReturn: + mov x0, #1 // signal return to caller. + b MterpDone +MterpReturn: + ldr x2, [xFP, #OFF_FP_RESULT_REGISTER] + ldr lr, [xSELF, #THREAD_FLAGS_OFFSET] + str x0, [x2] + mov x0, xSELF + ands lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) + b.eq check2 + bl MterpSuspendCheck // (self) +check2: + mov x0, #1 // signal return to caller. +MterpDone: +/* + * At this point, we expect wPROFILE to be non-zero. If negative, hotness is disabled or we're + * checking for OSR. If greater than zero, we might have unreported hotness to register + * (the difference between the ending wPROFILE and the cached hotness counter). wPROFILE + * should only reach zero immediately after a hotness decrement, and is then reset to either + * a negative special state or the new non-zero countdown value. + */ + cmp wPROFILE, #0 + bgt MterpProfileActive // if > 0, we may have some counts to report. + ldp fp, lr, [sp, #64] + ldp xPC, xFP, [sp, #48] + ldp xSELF, xINST, [sp, #32] + ldp xIBASE, xREFS, [sp, #16] + ldp xPROFILE, x27, [sp], #80 + ret + +MterpProfileActive: + mov xINST, x0 // stash return value + /* Report cached hotness counts */ + ldr x0, [xFP, #OFF_FP_METHOD] + add x1, xFP, #OFF_FP_SHADOWFRAME + mov x2, xSELF + strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] + bl MterpAddHotnessBatch // (method, shadow_frame, self) + mov x0, xINST // restore return value + ldp fp, lr, [sp, #64] + ldp xPC, xFP, [sp, #48] + ldp xSELF, xINST, [sp, #32] + ldp xIBASE, xREFS, [sp, #16] + ldp xPROFILE, x27, [sp], #80 + ret + + .cfi_endproc + .size ExecuteMterpImpl, .-ExecuteMterpImpl + + .global artMterpAsmAltInstructionStart .type artMterpAsmAltInstructionStart, %function @@ -11247,318 +11555,3 @@ artMterpAsmAltInstructionStart = .L_ALT_op_nop .size artMterpAsmAltInstructionStart, .-artMterpAsmAltInstructionStart .global artMterpAsmAltInstructionEnd artMterpAsmAltInstructionEnd: -/* File: arm64/footer.S */ -/* - * =========================================================================== - * Common subroutines and data - * =========================================================================== - */ - - -/* - * We've detected a condition that will result in an exception, but the exception - * has not yet been thrown. Just bail out to the reference interpreter to deal with it. - * TUNING: for consistency, we may want to just go ahead and handle these here. - */ -common_errDivideByZero: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpLogDivideByZeroException -#endif - b MterpCommonFallback - -common_errArrayIndex: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpLogArrayIndexException -#endif - b MterpCommonFallback - -common_errNegativeArraySize: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpLogNegativeArraySizeException -#endif - b MterpCommonFallback - -common_errNoSuchMethod: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpLogNoSuchMethodException -#endif - b MterpCommonFallback - -common_errNullObject: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpLogNullObjectException -#endif - b MterpCommonFallback - -common_exceptionThrown: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpLogExceptionThrownException -#endif - b MterpCommonFallback - -MterpSuspendFallback: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - ldr x2, [xSELF, #THREAD_FLAGS_OFFSET] - bl MterpLogSuspendFallback -#endif - b MterpCommonFallback - -/* - * If we're here, something is out of the ordinary. If there is a pending - * exception, handle it. Otherwise, roll back and retry with the reference - * interpreter. - */ -MterpPossibleException: - ldr x0, [xSELF, #THREAD_EXCEPTION_OFFSET] - cbz x0, MterpFallback // If not, fall back to reference interpreter. - /* intentional fallthrough - handle pending exception. */ -/* - * On return from a runtime helper routine, we've found a pending exception. - * Can we handle it here - or need to bail out to caller? - * - */ -MterpException: - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpHandleException // (self, shadow_frame) - cbz w0, MterpExceptionReturn // no local catch, back to caller. - ldr x0, [xFP, #OFF_FP_CODE_ITEM] - ldr w1, [xFP, #OFF_FP_DEX_PC] - ldr xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] - add xPC, x0, #CODEITEM_INSNS_OFFSET - add xPC, xPC, x1, lsl #1 // generate new dex_pc_ptr - /* Do we need to switch interpreters? */ - bl MterpShouldSwitchInterpreters - cbnz w0, MterpFallback - /* resume execution at catch block */ - EXPORT_PC - FETCH_INST - GET_INST_OPCODE ip - GOTO_OPCODE ip - /* NOTE: no fallthrough */ -/* - * Common handling for branches with support for Jit profiling. - * On entry: - * wINST <= signed offset - * wPROFILE <= signed hotness countdown (expanded to 32 bits) - * condition bits <= set to establish sign of offset (use "NoFlags" entry if not) - * - * We have quite a few different cases for branch profiling, OSR detection and - * suspend check support here. - * - * Taken backward branches: - * If profiling active, do hotness countdown and report if we hit zero. - * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. - * Is there a pending suspend request? If so, suspend. - * - * Taken forward branches and not-taken backward branches: - * If in osr check mode, see if our target is a compiled loop header entry and do OSR if so. - * - * Our most common case is expected to be a taken backward branch with active jit profiling, - * but no full OSR check and no pending suspend request. - * Next most common case is not-taken branch with no full OSR check. - * - */ -MterpCommonTakenBranchNoFlags: - cmp wINST, #0 - b.gt .L_forward_branch // don't add forward branches to hotness - tbnz wPROFILE, #31, .L_no_count_backwards // go if negative - subs wPROFILE, wPROFILE, #1 // countdown - b.eq .L_add_batch // counted down to zero - report -.L_resume_backward_branch: - ldr lr, [xSELF, #THREAD_FLAGS_OFFSET] - add w2, wINST, wINST // w2<- byte offset - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - REFRESH_IBASE - ands lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) - b.ne .L_suspend_request_pending - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction - -.L_suspend_request_pending: - EXPORT_PC - mov x0, xSELF - bl MterpSuspendCheck // (self) - cbnz x0, MterpFallback - REFRESH_IBASE // might have changed during suspend - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction - -.L_no_count_backwards: - cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? - b.ne .L_resume_backward_branch - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - mov x2, xINST - EXPORT_PC - bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) - cbnz x0, MterpOnStackReplacement - b .L_resume_backward_branch - -.L_forward_branch: - cmp wPROFILE, #JIT_CHECK_OSR // possible OSR re-entry? - b.eq .L_check_osr_forward -.L_resume_forward_branch: - add w2, wINST, wINST // w2<- byte offset - FETCH_ADVANCE_INST_RB w2 // update rPC, load wINST - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction - -.L_check_osr_forward: - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - mov x2, xINST - EXPORT_PC - bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) - cbnz x0, MterpOnStackReplacement - b .L_resume_forward_branch - -.L_add_batch: - add x1, xFP, #OFF_FP_SHADOWFRAME - strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] - ldr x0, [xFP, #OFF_FP_METHOD] - mov x2, xSELF - bl MterpAddHotnessBatch // (method, shadow_frame, self) - mov wPROFILE, w0 // restore new hotness countdown to wPROFILE - b .L_no_count_backwards - -/* - * Entered from the conditional branch handlers when OSR check request active on - * not-taken path. All Dalvik not-taken conditional branch offsets are 2. - */ -.L_check_not_taken_osr: - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - mov x2, #2 - EXPORT_PC - bl MterpMaybeDoOnStackReplacement // (self, shadow_frame, offset) - cbnz x0, MterpOnStackReplacement - FETCH_ADVANCE_INST 2 - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction - - -/* - * Check for suspend check request. Assumes wINST already loaded, xPC advanced and - * still needs to get the opcode and branch to it, and flags are in lr. - */ -MterpCheckSuspendAndContinue: - ldr xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET] // refresh xIBASE - ands w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) - b.ne check1 - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction -check1: - EXPORT_PC - mov x0, xSELF - bl MterpSuspendCheck // (self) - cbnz x0, MterpFallback // Something in the environment changed, switch interpreters - GET_INST_OPCODE ip // extract opcode from wINST - GOTO_OPCODE ip // jump to next instruction - -/* - * On-stack replacement has happened, and now we've returned from the compiled method. - */ -MterpOnStackReplacement: -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - sxtw x2, wINST - bl MterpLogOSR -#endif - mov x0, #1 // Signal normal return - b MterpDone - -/* - * Bail out to reference interpreter. - */ -MterpFallback: - EXPORT_PC -#if MTERP_LOGGING - mov x0, xSELF - add x1, xFP, #OFF_FP_SHADOWFRAME - bl MterpLogFallback -#endif -MterpCommonFallback: - mov x0, #0 // signal retry with reference interpreter. - b MterpDone - -/* - * We pushed some registers on the stack in ExecuteMterpImpl, then saved - * SP and LR. Here we restore SP, restore the registers, and then restore - * LR to PC. - * - * On entry: - * uint32_t* xFP (should still be live, pointer to base of vregs) - */ -MterpExceptionReturn: - mov x0, #1 // signal return to caller. - b MterpDone -MterpReturn: - ldr x2, [xFP, #OFF_FP_RESULT_REGISTER] - ldr lr, [xSELF, #THREAD_FLAGS_OFFSET] - str x0, [x2] - mov x0, xSELF - ands lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST) - b.eq check2 - bl MterpSuspendCheck // (self) -check2: - mov x0, #1 // signal return to caller. -MterpDone: -/* - * At this point, we expect wPROFILE to be non-zero. If negative, hotness is disabled or we're - * checking for OSR. If greater than zero, we might have unreported hotness to register - * (the difference between the ending wPROFILE and the cached hotness counter). wPROFILE - * should only reach zero immediately after a hotness decrement, and is then reset to either - * a negative special state or the new non-zero countdown value. - */ - cmp wPROFILE, #0 - bgt MterpProfileActive // if > 0, we may have some counts to report. - ldp fp, lr, [sp, #64] - ldp xPC, xFP, [sp, #48] - ldp xSELF, xINST, [sp, #32] - ldp xIBASE, xREFS, [sp, #16] - ldp xPROFILE, x27, [sp], #80 - ret - -MterpProfileActive: - mov xINST, x0 // stash return value - /* Report cached hotness counts */ - ldr x0, [xFP, #OFF_FP_METHOD] - add x1, xFP, #OFF_FP_SHADOWFRAME - mov x2, xSELF - strh wPROFILE, [x1, #SHADOWFRAME_HOTNESS_COUNTDOWN_OFFSET] - bl MterpAddHotnessBatch // (method, shadow_frame, self) - mov x0, xINST // restore return value - ldp fp, lr, [sp, #64] - ldp xPC, xFP, [sp, #48] - ldp xSELF, xINST, [sp, #32] - ldp xIBASE, xREFS, [sp, #16] - ldp xPROFILE, x27, [sp], #80 - ret - - .cfi_endproc - .size ExecuteMterpImpl, .-ExecuteMterpImpl - - diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc index 5039d2de07..aa606a26d3 100644 --- a/runtime/jit/offline_profiling_info.cc +++ b/runtime/jit/offline_profiling_info.cc @@ -637,7 +637,7 @@ std::string ProfileCompilationInfo::DumpInfo(const std::vector<const DexFile*>* os << "\n\tclasses: "; for (const auto class_it : dex_data.class_set) { if (dex_file != nullptr) { - os << "\n\t\t" << PrettyType(class_it, *dex_file); + os << "\n\t\t" << dex_file->GetClassDescriptor(dex_file->GetClassDef(class_it)); } else { os << class_it << ","; } diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h index 32ed337cca..3ba9e1aaab 100644 --- a/runtime/mirror/class.h +++ b/runtime/mirror/class.h @@ -471,7 +471,7 @@ class MANAGED Class FINAL : public Object { template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier> size_t GetComponentSize() SHARED_REQUIRES(Locks::mutator_lock_) { - return 1U << GetComponentSizeShift(); + return 1U << GetComponentSizeShift<kReadBarrierOption>(); } template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier> diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h index 3baa12e40b..e8ad5fa1c8 100644 --- a/runtime/mirror/reference.h +++ b/runtime/mirror/reference.h @@ -76,8 +76,9 @@ class MANAGED Reference : public Object { SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr); } + template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier> Reference* GetPendingNext() SHARED_REQUIRES(Locks::mutator_lock_) { - return GetFieldObject<Reference>(PendingNextOffset()); + return GetFieldObject<Reference, kDefaultVerifyFlags, kReadBarrierOption>(PendingNextOffset()); } void SetPendingNext(Reference* pending_next) @@ -102,7 +103,7 @@ class MANAGED Reference : public Object { // removed from the list after having determined the reference is not ready // to be enqueued on a java ReferenceQueue. bool IsUnprocessed() SHARED_REQUIRES(Locks::mutator_lock_) { - return GetPendingNext() == nullptr; + return GetPendingNext<kWithoutReadBarrier>() == nullptr; } template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier> diff --git a/runtime/oat.h b/runtime/oat.h index 9b8f5452c4..2c5c3e636f 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,7 +32,7 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' }; - static constexpr uint8_t kOatVersion[] = { '0', '8', '4', '\0' }; + static constexpr uint8_t kOatVersion[] = { '0', '8', '5', '\0' }; static constexpr const char* kImageLocationKey = "image-location"; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; diff --git a/runtime/thread.cc b/runtime/thread.cc index 50f76da3df..9cce1711bd 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -2604,8 +2604,6 @@ void Thread::DumpThreadOffset(std::ostream& os, uint32_t offset) { QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg27) QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg28) QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg29) - QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg30) - QUICK_ENTRY_POINT_INFO(pReadBarrierMarkReg31) QUICK_ENTRY_POINT_INFO(pReadBarrierSlow) QUICK_ENTRY_POINT_INFO(pReadBarrierForRootSlow) #undef QUICK_ENTRY_POINT_INFO diff --git a/test/496-checker-inlining-and-class-loader/src/Main.java b/test/496-checker-inlining-and-class-loader/src/Main.java index 8de6318664..78e8a40399 100644 --- a/test/496-checker-inlining-and-class-loader/src/Main.java +++ b/test/496-checker-inlining-and-class-loader/src/Main.java @@ -107,7 +107,8 @@ class LoadedByMyClassLoader { /* Load and initialize FirstSeenByMyClassLoader */ /// CHECK: LoadClass gen_clinit_check:true /* Load and initialize System */ - /// CHECK-NEXT: LoadClass gen_clinit_check:true + // There may be MipsComputeBaseMethodAddress here. + /// CHECK: LoadClass gen_clinit_check:true /// CHECK-NEXT: StaticFieldGet // There may be HArmDexCacheArraysBase or HX86ComputeBaseMethodAddress here. /// CHECK: LoadString diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java index 09a77ed285..2232ff43d2 100644 --- a/test/552-checker-sharpening/src/Main.java +++ b/test/552-checker-sharpening/src/Main.java @@ -51,6 +51,10 @@ public class Main { /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after) /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after) + /// CHECK-NOT: MipsDexCacheArraysBase + /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK-START-X86: int Main.testSimple(int) sharpening (after) /// CHECK-NOT: X86ComputeBaseMethodAddress /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative @@ -62,6 +66,10 @@ public class Main { /// CHECK: ArmDexCacheArraysBase /// CHECK-NOT: ArmDexCacheArraysBase + /// CHECK-START-MIPS: int Main.testSimple(int) dex_cache_array_fixups_mips (after) + /// CHECK: MipsDexCacheArraysBase + /// CHECK-NOT: MipsDexCacheArraysBase + /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after) /// CHECK: X86ComputeBaseMethodAddress /// CHECK-NOT: X86ComputeBaseMethodAddress @@ -83,6 +91,11 @@ public class Main { /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after) + /// CHECK-NOT: MipsDexCacheArraysBase + /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after) /// CHECK-NOT: X86ComputeBaseMethodAddress /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative @@ -100,6 +113,14 @@ public class Main { /// CHECK: ArmDexCacheArraysBase /// CHECK-NEXT: If + /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after) + /// CHECK: MipsDexCacheArraysBase + /// CHECK-NOT: MipsDexCacheArraysBase + + /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) dex_cache_array_fixups_mips (after) + /// CHECK: MipsDexCacheArraysBase + /// CHECK-NEXT: If + /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after) /// CHECK: X86ComputeBaseMethodAddress /// CHECK-NOT: X86ComputeBaseMethodAddress @@ -110,7 +131,7 @@ public class Main { public static int testDiamond(boolean negate, int x) { // These calls should use PC-relative dex cache array loads to retrieve the target method. - // PC-relative bases used by X86 and ARM should be pulled before the If. + // PC-relative bases used by ARM, MIPS and X86 should be pulled before the If. if (negate) { return $noinline$foo(-x); } else { @@ -154,8 +175,26 @@ public class Main { /// CHECK: begin_block /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (before) + /// CHECK-NOT: MipsDexCacheArraysBase + + /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after) + /// CHECK: MipsDexCacheArraysBase + /// CHECK-NOT: MipsDexCacheArraysBase + + /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after) + /// CHECK: InvokeStaticOrDirect + /// CHECK-NOT: InvokeStaticOrDirect + + /// CHECK-START-MIPS: int Main.testLoop(int[], int) dex_cache_array_fixups_mips (after) + /// CHECK: ArrayLength + /// CHECK-NEXT: MipsDexCacheArraysBase + /// CHECK-NEXT: Goto + /// CHECK: begin_block + /// CHECK: InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative + public static int testLoop(int[] array, int x) { - // PC-relative bases used by X86 and ARM should be pulled before the loop. + // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop. for (int i : array) { x += $noinline$foo(i); } @@ -182,8 +221,18 @@ public class Main { /// CHECK-NEXT: ArmDexCacheArraysBase /// CHECK-NEXT: Goto + /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (before) + /// CHECK-NOT: MipsDexCacheArraysBase + + /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_mips (after) + /// CHECK: If + /// CHECK: begin_block + /// CHECK: ArrayLength + /// CHECK-NEXT: MipsDexCacheArraysBase + /// CHECK-NEXT: Goto + public static int testLoopWithDiamond(int[] array, boolean negate, int x) { - // PC-relative bases used by X86 and ARM should be pulled before the loop + // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop // but not outside the if. if (array != null) { for (int i : array) { @@ -220,6 +269,11 @@ public class Main { // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. /// CHECK: LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} + /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after) + // Note: load kind depends on PIC/non-PIC + // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. + /// CHECK: LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} + public static String $noinline$getBootImageString() { // Prevent inlining to avoid the string comparison being optimized away. if (doThrow) { throw new Error(); } @@ -250,6 +304,13 @@ public class Main { /// CHECK-START-ARM64: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after) /// CHECK: LoadString load_kind:DexCachePcRelative + /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after) + /// CHECK: LoadString load_kind:DexCachePcRelative + + /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() dex_cache_array_fixups_mips (after) + /// CHECK-DAG: MipsDexCacheArraysBase + /// CHECK-DAG: LoadString load_kind:DexCachePcRelative + public static String $noinline$getNonBootImageString() { // Prevent inlining to avoid the string comparison being optimized away. if (doThrow) { throw new Error(); } @@ -280,6 +341,11 @@ public class Main { // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. /// CHECK: LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String + /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after) + // Note: load kind depends on PIC/non-PIC + // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress. + /// CHECK: LoadClass load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}} class_name:java.lang.String + public static Class<?> $noinline$getStringClass() { // Prevent inlining to avoid the string comparison being optimized away. if (doThrow) { throw new Error(); } @@ -310,6 +376,13 @@ public class Main { /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getOtherClass() sharpening (after) /// CHECK: LoadClass load_kind:DexCachePcRelative class_name:Other + /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() sharpening (after) + /// CHECK: LoadClass load_kind:DexCachePcRelative class_name:Other + + /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getOtherClass() dex_cache_array_fixups_mips (after) + /// CHECK-DAG: MipsDexCacheArraysBase + /// CHECK-DAG: LoadClass load_kind:DexCachePcRelative class_name:Other + public static Class<?> $noinline$getOtherClass() { // Prevent inlining to avoid the string comparison being optimized away. if (doThrow) { throw new Error(); } diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt index 996f2f868f..bf8d12b9c6 100644 --- a/tools/libcore_failures.txt +++ b/tools/libcore_failures.txt @@ -260,5 +260,12 @@ bug: 30107038, modes: [device], names: ["org.apache.harmony.tests.java.lang.ProcessTest#test_destroyForcibly"] +}, +{ + description: "Flaky failure, possibly caused by a kernel bug accessing /proc/", + result: EXEC_FAILED, + bug: 27464570, + modes: [device], + names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit"] } ] |