diff options
| -rw-r--r-- | build/Android.gtest.mk | 12 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 6 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 33 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 7 | ||||
| -rw-r--r-- | compiler/optimizing/codegen_test.cc | 5 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 15 | ||||
| -rw-r--r-- | disassembler/disassembler_arm.cc | 454 |
8 files changed, 276 insertions, 260 deletions
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk index bd74ada9a6..5aa264ca02 100644 --- a/build/Android.gtest.mk +++ b/build/Android.gtest.mk @@ -66,8 +66,16 @@ ART_GTEST_elf_writer_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) ART_GTEST_proxy_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32) # The imgdiag test has dependencies on core.oat since it needs to load it during the test. -ART_GTEST_imgdiag_test_HOST_DEPS := $(HOST_CORE_IMAGE_default_no-pic_64) $(HOST_CORE_IMAGE_default_no-pic_32) -ART_GTEST_imgdiag_test_TARGET_DEPS := $(TARGET_CORE_IMAGE_default_no-pic_64) $(TARGET_CORE_IMAGE_default_no-pic_32) +# For the host, also add the installed tool (in the base size, that should suffice). For the +# target, just the module is fine, the sync will happen late enough. +ART_GTEST_imgdiag_test_HOST_DEPS := \ + $(HOST_CORE_IMAGE_default_no-pic_64) \ + $(HOST_CORE_IMAGE_default_no-pic_32) \ + $(HOST_OUT_EXECUTABLES)/imgdiagd +ART_GTEST_imgdiag_test_TARGET_DEPS := \ + $(TARGET_CORE_IMAGE_default_no-pic_64) \ + $(TARGET_CORE_IMAGE_default_no-pic_32) \ + imgdiagd # The path for which all the source files are relative, not actually the current directory. LOCAL_PATH := art diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 91426f347b..4d8154e6a0 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -327,11 +327,13 @@ bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) con CodeGenerator* CodeGenerator::Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set) { + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features) { switch (instruction_set) { case kArm: case kThumb2: { - return new (allocator) arm::CodeGeneratorARM(graph); + return new (allocator) arm::CodeGeneratorARM(graph, + isa_features.AsArmInstructionSetFeatures()); } case kArm64: { return new (allocator) arm64::CodeGeneratorARM64(graph); diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 2e7eca2ead..4205ebebf9 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -18,6 +18,7 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_ #include "arch/instruction_set.h" +#include "arch/instruction_set_features.h" #include "base/bit_field.h" #include "globals.h" #include "locations.h" @@ -84,7 +85,8 @@ class CodeGenerator : public ArenaObject<kArenaAllocMisc> { void CompileOptimized(CodeAllocator* allocator); static CodeGenerator* Create(ArenaAllocator* allocator, HGraph* graph, - InstructionSet instruction_set); + InstructionSet instruction_set, + const InstructionSetFeatures& isa_features); HGraph* GetGraph() const { return graph_; } diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 8c107f302e..3b3fb64763 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -16,6 +16,7 @@ #include "code_generator_arm.h" +#include "arch/arm/instruction_set_features_arm.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" #include "mirror/array-inl.h" @@ -372,13 +373,15 @@ size_t CodeGeneratorARM::RestoreCoreRegister(size_t stack_index, uint32_t reg_id return kArmWordSize; } -CodeGeneratorARM::CodeGeneratorARM(HGraph* graph) +CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, + const ArmInstructionSetFeatures* isa_features) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, kNumberOfRegisterPairs), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(true) {} + assembler_(true), + isa_features_(isa_features) {} size_t CodeGeneratorARM::FrameEntrySpillSize() const { return kNumberOfPushedRegistersAtEntry * kArmWordSize; @@ -2615,16 +2618,18 @@ void LocationsBuilderARM::HandleFieldSet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - bool is_volatile = field_info.IsVolatile(); + Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; - + bool generate_volatile = field_info.IsVolatile() + && is_wide + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); // Temporary registers for the write barrier. // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark. if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - } else if (is_volatile && is_wide) { + } else if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2651,6 +2656,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, Location value = locations->InAt(1); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2684,10 +2690,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { - // TODO: We could use ldrd and strd that are atomic with Large Physical Address Extension - // support. This info is stored in the compiler driver (HasAtomicLdrdAndStrd) and we should - // pass it around to be able to optimize. + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicStore(base, offset, value.AsRegisterPairLow<Register>(), value.AsRegisterPairHigh<Register>(), @@ -2706,7 +2709,7 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister value_reg = FromLowSToD(value.AsFpuRegisterPairLow<SRegister>()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register value_reg_lo = locations->GetTemp(0).AsRegister<Register>(); Register value_reg_hi = locations->GetTemp(1).AsRegister<Register>(); @@ -2740,7 +2743,10 @@ void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldI locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimDouble)) { + bool generate_volatile = field_info.IsVolatile() + && (field_info.GetFieldType() == Primitive::kPrimDouble) + && !codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); + if (generate_volatile) { // Arm encoding have some additional constraints for ldrexd/strexd: // - registers need to be consecutive // - the first register should be even but not R14. @@ -2760,6 +2766,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, Register base = locations->InAt(0).AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); + bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures()->HasAtomicLdrdAndStrd(); Primitive::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -2791,7 +2798,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, } case Primitive::kPrimLong: { - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, out.AsRegisterPairLow<Register>(), out.AsRegisterPairHigh<Register>()); @@ -2808,7 +2815,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction, case Primitive::kPrimDouble: { DRegister out_reg = FromLowSToD(out.AsFpuRegisterPairLow<SRegister>()); - if (is_volatile) { + if (is_volatile && !atomic_ldrd_strd) { Register lo = locations->GetTemp(0).AsRegister<Register>(); Register hi = locations->GetTemp(1).AsRegister<Register>(); GenerateWideAtomicLoad(base, offset, lo, hi); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index b86670d939..40f4edc4eb 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -159,7 +159,7 @@ class InstructionCodeGeneratorARM : public HGraphVisitor { class CodeGeneratorARM : public CodeGenerator { public: - explicit CodeGeneratorARM(HGraph* graph); + CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures* isa_features); virtual ~CodeGeneratorARM() {} void GenerateFrameEntry() OVERRIDE; @@ -233,6 +233,10 @@ class CodeGeneratorARM : public CodeGenerator { block_labels_.SetSize(GetGraph()->GetBlocks().Size()); } + const ArmInstructionSetFeatures* GetInstructionSetFeatures() const { + return isa_features_; + } + private: // Labels for each block that will be compiled. GrowableArray<Label> block_labels_; @@ -240,6 +244,7 @@ class CodeGeneratorARM : public CodeGenerator { InstructionCodeGeneratorARM instruction_visitor_; ParallelMoveResolverARM move_resolver_; Thumb2Assembler assembler_; + const ArmInstructionSetFeatures* isa_features_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM); }; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 8b75cc7c65..4c06e23bbc 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -17,6 +17,7 @@ #include <functional> #include "arch/instruction_set.h" +#include "arch/arm/instruction_set_features_arm.h" #include "base/macros.h" #include "builder.h" #include "code_generator_arm.h" @@ -87,7 +88,7 @@ static void RunCodeBaseline(HGraph* graph, bool has_result, Expected expected) { Run(allocator, codegenX86, has_result, expected); } - arm::CodeGeneratorARM codegenARM(graph); + arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines()); codegenARM.CompileBaseline(&allocator, true); if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { Run(allocator, codegenARM, has_result, expected); @@ -130,7 +131,7 @@ static void RunCodeOptimized(HGraph* graph, bool has_result, Expected expected) { if (kRuntimeISA == kArm || kRuntimeISA == kThumb2) { - arm::CodeGeneratorARM codegenARM(graph); + arm::CodeGeneratorARM codegenARM(graph, ArmInstructionSetFeatures::FromCppDefines()); RunCodeOptimized(&codegenARM, graph, hook_before_codegen, has_result, expected); } else if (kRuntimeISA == kArm64) { arm64::CodeGeneratorARM64 codegenARM64(graph); diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 87f2b90775..1a27724d33 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -289,7 +289,9 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } - CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set); + CompilerDriver* compiler_driver = GetCompilerDriver(); + CodeGenerator* codegen = CodeGenerator::Create(&arena, graph, instruction_set, + *compiler_driver->GetInstructionSetFeatures()); if (codegen == nullptr) { CHECK(!shouldCompile) << "Could not find code generator for optimizing compiler"; compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen); @@ -315,7 +317,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, return nullptr; } RunOptimizations( - graph, GetCompilerDriver(), &compilation_stats_, dex_compilation_unit, visualizer); + graph, compiler_driver, &compilation_stats_, dex_compilation_unit, visualizer); PrepareForRegisterAllocation(graph).Run(); SsaLivenessAnalysis liveness(*graph, codegen); @@ -333,7 +335,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized); return CompiledMethod::SwapAllocCompiledMethodStackMap( - GetCompilerDriver(), + compiler_driver, instruction_set, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), @@ -358,16 +360,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, std::vector<uint8_t> mapping_table; DefaultSrcMap src_mapping_table; - codegen->BuildMappingTable(&mapping_table, - GetCompilerDriver()->GetCompilerOptions().GetIncludeDebugSymbols() ? - &src_mapping_table : nullptr); + bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols(); + codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr); std::vector<uint8_t> vmap_table; codegen->BuildVMapTable(&vmap_table); std::vector<uint8_t> gc_map; codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit); compilation_stats_.RecordStat(MethodCompilationStat::kCompiledBaseline); - return CompiledMethod::SwapAllocCompiledMethod(GetCompilerDriver(), + return CompiledMethod::SwapAllocCompiledMethod(compiler_driver, instruction_set, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc index 52fd736cdb..31e653bf92 100644 --- a/disassembler/disassembler_arm.cc +++ b/disassembler/disassembler_arm.cc @@ -21,6 +21,7 @@ #include <ostream> #include <sstream> +#include "arch/arm/registers_arm.h" #include "base/logging.h" #include "base/stringprintf.h" #include "thread.h" @@ -148,15 +149,15 @@ struct ThumbRegister : ArmRegister { ThumbRegister(uint16_t instruction, uint16_t at_bit) : ArmRegister((instruction >> at_bit) & 0x7) {} }; -struct Rm { - explicit Rm(uint32_t instruction) : shift((instruction >> 4) & 0xff), rm(instruction & 0xf) {} - uint32_t shift; +struct RmLslImm2 { + explicit RmLslImm2(uint32_t instr) : imm2((instr >> 4) & 0x3), rm(instr & 0xf) {} + uint32_t imm2; ArmRegister rm; }; -std::ostream& operator<<(std::ostream& os, const Rm& r) { +std::ostream& operator<<(std::ostream& os, const RmLslImm2& r) { os << r.rm; - if (r.shift != 0) { - os << "-shift-" << r.shift; // TODO + if (r.imm2 != 0) { + os << ", lsl #" << r.imm2; } return os; } @@ -397,7 +398,74 @@ static uint64_t VFPExpand64(uint32_t imm8) { uint64_t bit_a = (imm8 >> 7) & 1; uint64_t bit_b = (imm8 >> 6) & 1; uint64_t slice = imm8 & 0x3f; - return (bit_a << 31) | ((UINT64_C(1) << 62) - (bit_b << 54)) | (slice << 48); + return (bit_a << 63) | ((UINT64_C(1) << 62) - (bit_b << 54)) | (slice << 48); +} + +enum T2LitType { + kT2LitInvalid, + kT2LitUByte, + kT2LitSByte, + kT2LitUHalf, + kT2LitSHalf, + kT2LitUWord, + kT2LitSWord, + kT2LitHexWord, + kT2LitULong, + kT2LitSLong, + kT2LitHexLong, +}; +std::ostream& operator<<(std::ostream& os, T2LitType type) { + return os << static_cast<int>(type); +} + +void DumpThumb2Literal(std::ostream& args, const uint8_t* instr_ptr, uint32_t U, uint32_t imm32, + T2LitType type) { + // Literal offsets (imm32) are not required to be aligned so we may need unaligned access. + typedef const int16_t unaligned_int16_t __attribute__ ((aligned (1))); + typedef const uint16_t unaligned_uint16_t __attribute__ ((aligned (1))); + typedef const int32_t unaligned_int32_t __attribute__ ((aligned (1))); + typedef const uint32_t unaligned_uint32_t __attribute__ ((aligned (1))); + typedef const int64_t unaligned_int64_t __attribute__ ((aligned (1))); + typedef const uint64_t unaligned_uint64_t __attribute__ ((aligned (1))); + + uintptr_t pc = RoundDown(reinterpret_cast<intptr_t>(instr_ptr) + 4, 4); + uintptr_t lit_adr = U ? pc + imm32 : pc - imm32; + args << " ; "; + switch (type) { + case kT2LitUByte: + args << *reinterpret_cast<const uint8_t*>(lit_adr); + break; + case kT2LitSByte: + args << *reinterpret_cast<const int8_t*>(lit_adr); + break; + case kT2LitUHalf: + args << *reinterpret_cast<const unaligned_uint16_t*>(lit_adr); + break; + case kT2LitSHalf: + args << *reinterpret_cast<const unaligned_int16_t*>(lit_adr); + break; + case kT2LitUWord: + args << *reinterpret_cast<const unaligned_uint32_t*>(lit_adr); + break; + case kT2LitSWord: + args << *reinterpret_cast<const unaligned_int32_t*>(lit_adr); + break; + case kT2LitHexWord: + args << StringPrintf("0x%08x", *reinterpret_cast<const unaligned_uint32_t*>(lit_adr)); + break; + case kT2LitULong: + args << *reinterpret_cast<const unaligned_uint64_t*>(lit_adr); + break; + case kT2LitSLong: + args << *reinterpret_cast<const unaligned_int64_t*>(lit_adr); + break; + case kT2LitHexLong: + args << StringPrintf("0x%" PRIx64, *reinterpret_cast<unaligned_int64_t*>(lit_adr)); + break; + default: + LOG(FATAL) << "Invalid type: " << type; + break; + } } size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) { @@ -756,10 +824,7 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) args << d << ", [" << Rn << ", #" << ((U == 1) ? "" : "-") << (imm8 << 2) << "]"; if (Rn.r == 15 && U == 1) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + (imm8 << 2); - typedef const int64_t unaligned_int64_t __attribute__ ((aligned (2))); - args << StringPrintf(" ; 0x%" PRIx64, *reinterpret_cast<unaligned_int64_t*>(lit_adr)); + DumpThumb2Literal(args, instr_ptr, U, imm8 << 2, kT2LitHexLong); } } else if (Rn.r == 13 && W == 1 && U == L) { // VPUSH/VPOP opcode << (L == 1 ? "vpop" : "vpush"); @@ -1227,164 +1292,141 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) break; case 3: switch (op2) { - case 0x00: case 0x02: case 0x04: case 0x06: // 000xxx0 - case 0x08: case 0x09: case 0x0A: case 0x0C: case 0x0E: { - // Store single data item - // |111|11|100|000|0|0000|1111|110000|000000| - // |5 3|21|098|765|4|3 0|5 2|10 6|5 0| - // |---|--|---|---|-|----|----|------|------| - // |332|22|222|222|2|1111|1111|110000|000000| - // |1 9|87|654|321|0|9 6|5 2|10 6|5 0| - // |---|--|---|---|-|----|----|------|------| - // |111|11|000|op3|0| | | op4 | | - uint32_t op3 = (instr >> 21) & 7; - // uint32_t op4 = (instr >> 6) & 0x3F; - switch (op3) { - case 0x0: case 0x4: { - // {ST,LD}RB Rt,[Rn,#+/-imm12] - 111 11 00 0 1 00 0 nnnn tttt 1 PUWii ii iiii - // {ST,LD}RB Rt,[Rn,#+/-imm8] - 111 11 00 0 0 00 0 nnnn tttt 1 PUWii ii iiii - // {ST,LD}RB Rt,[Rn,Rm,lsl #imm2] - 111 11 00 0 0 00 0 nnnn tttt 0 00000 ii mmmm - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - opcode << (HasBitSet(instr, 20) ? "ldrb" : "strb"); - if (HasBitSet(instr, 23)) { - uint32_t imm12 = instr & 0xFFF; - args << Rt << ", [" << Rn << ",#" << imm12 << "]"; - } else if ((instr & 0x800) != 0) { - uint32_t imm8 = instr & 0xFF; - args << Rt << ", [" << Rn << ",#" << imm8 << "]"; - } else { - uint32_t imm2 = (instr >> 4) & 3; - ArmRegister Rm(instr, 0); - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", " << "lsl #" << imm2; - } - args << "]"; - } - break; - } - case 0x1: case 0x5: { - // STRH Rt,[Rn,#+/-imm12] - 111 11 00 0 1 01 0 nnnn tttt 1 PUWii ii iiii - // STRH Rt,[Rn,#+/-imm8] - 111 11 00 0 0 01 0 nnnn tttt 1 PUWii ii iiii - // STRH Rt,[Rn,Rm,lsl #imm2] - 111 11 00 0 0 01 0 nnnn tttt 0 00000 ii mmmm - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - opcode << "strh"; - if (HasBitSet(instr, 23)) { - uint32_t imm12 = instr & 0xFFF; - args << Rt << ", [" << Rn << ",#" << imm12 << "]"; - } else if ((instr & 0x800) != 0) { - uint32_t imm8 = instr & 0xFF; - args << Rt << ", [" << Rn << ",#" << imm8 << "]"; - } else { - uint32_t imm2 = (instr >> 4) & 3; - ArmRegister Rm(instr, 0); - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", " << "lsl #" << imm2; - } - args << "]"; - } - break; - } - case 0x2: case 0x6: { - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - if (op3 == 2) { - if ((instr & 0x800) != 0) { - // STR Rt, [Rn, #imm8] - 111 11 000 010 0 nnnn tttt 1PUWiiiiiiii - uint32_t P = (instr >> 10) & 1; - uint32_t U = (instr >> 9) & 1; - uint32_t W = (instr >> 8) & 1; - uint32_t imm8 = instr & 0xFF; - int32_t imm32 = (imm8 << 24) >> 24; // sign-extend imm8 - if (Rn.r == 13 && P == 1 && U == 0 && W == 1 && imm32 == 4) { - opcode << "push"; - args << "{" << Rt << "}"; - } else if (Rn.r == 15 || (P == 0 && W == 0)) { - opcode << "UNDEFINED"; - } else { - if (P == 1 && U == 1 && W == 0) { - opcode << "strt"; - } else { - opcode << "str"; - } - args << Rt << ", [" << Rn; - if (P == 0 && W == 1) { - args << "], #" << imm32; - } else { - args << ", #" << imm32 << "]"; - if (W == 1) { - args << "!"; - } - } - } - } else { - // STR Rt, [Rn, Rm, LSL #imm2] - 111 11 000 010 0 nnnn tttt 000000iimmmm - ArmRegister Rm(instr, 0); - uint32_t imm2 = (instr >> 4) & 3; - opcode << "str.w"; - args << Rt << ", [" << Rn << ", " << Rm; - if (imm2 != 0) { - args << ", lsl #" << imm2; - } - args << "]"; - } - } else if (op3 == 6) { - // STR.W Rt, [Rn, #imm12] - 111 11 000 110 0 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "str.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - } - break; - } - } - + case 0x07: case 0x0F: case 0x17: case 0x1F: { // Explicitly UNDEFINED, A6.3. + opcode << "UNDEFINED"; + break; + } + case 0x06: case 0x0E: { // "Store single data item" undefined opcodes, A6.3.10. + opcode << "UNDEFINED [store]"; + break; + } + case 0x15: case 0x1D: { // "Load word" undefined opcodes, A6.3.7. + opcode << "UNDEFINED [load]"; break; } - case 0x03: case 0x0B: case 0x11: case 0x13: case 0x19: case 0x1B: { // 00xx011 - // Load byte/halfword - // |111|11|10|0 0|00|0|0000|1111|110000|000000| - // |5 3|21|09|8 7|65|4|3 0|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |332|22|22|2 2|22|2|1111|1111|110000|000000| - // |1 9|87|65|4 3|21|0|9 6|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |111|11|00|op3|01|1| Rn | Rt | op4 | | - // |111|11| op2 | | | imm12 | - uint32_t op3 = (instr >> 23) & 3; + case 0x10: case 0x12: case 0x14: case 0x16: case 0x18: case 0x1A: case 0x1C: case 0x1E: { + opcode << "UNKNOWN " << op2 << " [SIMD]"; + break; + } + case 0x01: case 0x00: case 0x09: case 0x08: // {LD,ST}RB{,T} + case 0x03: case 0x02: case 0x0B: case 0x0A: // {LD,ST}RH{,T} + case 0x05: case 0x04: case 0x0D: case 0x0C: // {LD,ST}R{,T} + case 0x11: case 0x19: // LDRSB{,T} (no signed store) + case 0x13: case 0x1B: { // LDRSH{,T} (no signed store) + // Load: + // (Store is the same except that l==0 and always s==0 below.) + // 00s.whl (sign, word, half, load) + // LDR{S}B imm12: 11111|00s1001| Rn | Rt |imm12 (0x09) + // LDR{S}B imm8: 11111|00s0001| Rn | Rt |1PUW|imm8 (0x01) + // LDR{S}BT imm8: 11111|00s0001| Rn | Rt |1110|imm8 (0x01) + // LDR{S}B lit: 11111|00sU001|1111| Rt |imm12 (0x01/0x09) + // LDR{S}B reg: 11111|00s0001| Rn | Rt |000000|imm2| Rm (0x01) + // LDR{S}H imm12: 11111|00s1011| Rn | Rt |imm12 (0x0B) + // LDR{S}H imm8: 11111|00s0011| Rn | Rt |1PUW|imm8 (0x03) + // LDR{S}HT imm8: 11111|00s0011| Rn | Rt |1110|imm8 (0x03) + // LDR{S}H lit: 11111|00sU011|1111| Rt |imm12 (0x03/0x0B) + // LDR{S}H reg: 11111|00s0011| Rn | Rt |000000|imm2| Rm (0x03) + // LDR imm12: 11111|0001101| Rn | Rt |imm12 (0x0D) + // LDR imm8: 11111|0000101| Rn | Rt |1PUW|imm8 (0x05) + // LDRT imm8: 11111|0000101| Rn | Rt |1110|imm8 (0x05) + // LDR lit: 11111|000U101|1111| Rt |imm12 (0x05/0x0D) + // LDR reg: 11111|0000101| Rn | Rt |000000|imm2| Rm (0x05) + // + // If Rt == 15, instead of load we have preload: + // PLD{W} imm12: 11111|00010W1| Rn |1111|imm12 (0x09/0x0B) + // PLD{W} imm8: 11111|00000W1| Rn |1111|1100|imm8 (0x01/0x03); -imm8 + // PLD lit: 11111|000U001|1111|1111|imm12 (0x01/0x09) + // PLD{W} reg: 11111|00000W1| Rn |1111|000000|imm2| Rm (0x01/0x03) + // PLI imm12: 11111|0011001| Rn |1111|imm12 (0x19) + // PLI imm8: 11111|0010001| Rn |1111|1100|imm8 (0x11); -imm8 + // PLI lit: 11111|001U001|1111|1111|imm12 (0x01/0x09) + // PLI reg: 11111|0010001| Rn |1111|000000|imm2| Rm (0x01/0x03) + + bool is_load = HasBitSet(instr, 20); + bool is_half = HasBitSet(instr, 21); // W for PLD/PLDW. + bool is_word = HasBitSet(instr, 22); + bool is_signed = HasBitSet(instr, 24); ArmRegister Rn(instr, 16); ArmRegister Rt(instr, 12); - if (Rt.r != 15) { - if (op3 == 1) { - // LDRH.W Rt, [Rn, #imm12] - 111 11 00 01 011 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "ldrh.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); - } - } else if (op3 == 3) { - // LDRSH.W Rt, [Rn, #imm12] - 111 11 00 11 011 nnnn tttt iiiiiiiiiiii - // LDRSB.W Rt, [Rn, #imm12] - 111 11 00 11 001 nnnn tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << (HasBitSet(instr, 20) ? "ldrsb.w" : "ldrsh.w"); - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); + uint32_t imm12 = instr & 0xFFF; + uint32_t U = (instr >> 23) & 1; // U for imm12 + uint32_t imm8 = instr & 0xFF; + uint32_t op4 = (instr >> 8) & 0xF; // 1PUW for imm8 + if (Rt.r == PC && is_load && !is_word) { + // PLD, PLDW, PLI + const char* pld_pli = (is_signed ? "pli" : "pld"); + const char* w = (is_half ? "w" : ""); + if (is_signed && !is_half) { + opcode << "UNDEFINED [PLI+W]"; + } else if (Rn.r == PC || U != 0u) { + opcode << pld_pli << w; + args << "[" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]"; + if (Rn.r == PC && is_half) { + args << " (UNPREDICTABLE)"; } + } else if ((instr & 0xFC0) == 0) { + opcode << pld_pli << w; + RmLslImm2 Rm(instr); + args << "[" << Rn << ", " << Rm << "]"; + } else if (op4 == 0xC) { + opcode << pld_pli << w; + args << "[" << Rn << ", #-" << imm8 << "]"; + } else { + opcode << "UNDEFINED [~" << pld_pli << "]"; } + break; + } + const char* ldr_str = is_load ? "ldr" : "str"; + const char* sign = is_signed ? "s" : ""; + const char* type = is_word ? "" : is_half ? "h" : "b"; + bool unpred = (Rt.r == SP && !is_word) || (Rt.r == PC && !is_load); + if (Rn.r == PC && !is_load) { + opcode << "UNDEFINED [STR-lit]"; + unpred = false; + } else if (Rn.r == PC || U != 0u) { + // Load/store with imm12 (load literal if Rn.r == PC; there's no store literal). + opcode << ldr_str << sign << type << ".w"; + args << Rt << ", [" << Rn << ", #" << (U != 0u ? "" : "-") << imm12 << "]"; + if (Rn.r == TR && is_load) { + args << " ; "; + Thread::DumpThreadOffset<4>(args, imm12); + } else if (Rn.r == PC) { + T2LitType lit_type[] = { + kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid, + kT2LitUByte, kT2LitUHalf, kT2LitHexWord, kT2LitInvalid, + kT2LitSByte, kT2LitSHalf, kT2LitInvalid, kT2LitInvalid, + kT2LitSByte, kT2LitSHalf, kT2LitInvalid, kT2LitInvalid, + }; + DCHECK_LT(op2 >> 1, arraysize(lit_type)); + DCHECK_NE(lit_type[op2 >> 1], kT2LitInvalid); + DumpThumb2Literal(args, instr_ptr, U, imm12, lit_type[op2 >> 1]); + } + } else if ((instr & 0xFC0) == 0) { + opcode << ldr_str << sign << type << ".w"; + RmLslImm2 Rm(instr); + args << Rt << ", [" << Rn << ", " << Rm << "]"; + unpred = unpred || (Rm.rm.r == SP) || (Rm.rm.r == PC); + } else if (is_word && Rn.r == SP && imm8 == 4 && op4 == (is_load ? 0xB : 0xD)) { + opcode << (is_load ? "pop" : "push") << ".w"; + args << Rn; + unpred = unpred || (Rn.r == SP); + } else if ((op4 & 5) == 0) { + opcode << "UNDEFINED [P = W = 0 for " << ldr_str << "]"; + unpred = false; + } else { + uint32_t P = (instr >> 10) & 1; + U = (instr >> 9) & 1; + uint32_t W = (instr >> 8) & 1; + bool pre_index = (P != 0 && W == 1); + bool post_index = (P == 0 && W == 1); + const char* t = (P != 0 && U != 0 && W == 0) ? "t" : ""; // Unprivileged load/store? + opcode << ldr_str << sign << type << t << ".w"; + args << Rt << ", [" << Rn << (post_index ? "]" : "") << ", #" << (U != 0 ? "" : "-") + << imm8 << (post_index ? "" : "]") << (pre_index ? "!" : ""); + unpred = (W != 0 && Rn.r == Rt.r); + } + if (unpred) { + args << " (UNPREDICTABLE)"; } break; } @@ -1413,75 +1455,6 @@ size_t DisassemblerArm::DumpThumb32(std::ostream& os, const uint8_t* instr_ptr) } // else unknown instruction break; } - case 0x05: case 0x0D: case 0x15: case 0x1D: { // 00xx101 - // Load word - // |111|11|10|0 0|00|0|0000|1111|110000|000000| - // |5 3|21|09|8 7|65|4|3 0|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |332|22|22|2 2|22|2|1111|1111|110000|000000| - // |1 9|87|65|4 3|21|0|9 6|5 2|10 6|5 0| - // |---|--|--|---|--|-|----|----|------|------| - // |111|11|00|op3|10|1| Rn | Rt | op4 | | - // |111|11| op2 | | | imm12 | - uint32_t op3 = (instr >> 23) & 3; - uint32_t op4 = (instr >> 6) & 0x3F; - ArmRegister Rn(instr, 16); - ArmRegister Rt(instr, 12); - if (op3 == 1 || Rn.r == 15) { - // LDR.W Rt, [Rn, #imm12] - 111 11 00 00 101 nnnn tttt iiiiiiiiiiii - // LDR.W Rt, [PC, #imm12] - 111 11 00 0x 101 1111 tttt iiiiiiiiiiii - uint32_t imm12 = instr & 0xFFF; - opcode << "ldr.w"; - args << Rt << ", [" << Rn << ", #" << imm12 << "]"; - if (Rn.r == 9) { - args << " ; "; - Thread::DumpThreadOffset<4>(args, imm12); - } else if (Rn.r == 15) { - intptr_t lit_adr = reinterpret_cast<intptr_t>(instr_ptr); - lit_adr = RoundDown(lit_adr, 4) + 4 + imm12; - args << StringPrintf(" ; 0x%08x", *reinterpret_cast<int32_t*>(lit_adr)); - } - } else if (op4 == 0) { - // LDR.W Rt, [Rn, Rm{, LSL #imm2}] - 111 11 00 00 101 nnnn tttt 000000iimmmm - uint32_t imm2 = (instr >> 4) & 0xF; - ArmRegister rm(instr, 0); - opcode << "ldr.w"; - args << Rt << ", [" << Rn << ", " << rm; - if (imm2 != 0) { - args << ", lsl #" << imm2; - } - args << "]"; - } else { - bool p = (instr & (1 << 10)) != 0; - bool w = (instr & (1 << 8)) != 0; - bool u = (instr & (1 << 9)) != 0; - if (p && u && !w) { - // LDRT Rt, [Rn, #imm8] - 111 11 00 00 101 nnnn tttt 1110iiiiiiii - uint32_t imm8 = instr & 0xFF; - opcode << "ldrt"; - args << Rt << ", [" << Rn << ", #" << imm8 << "]"; - } else if (Rn.r == 13 && !p && u && w && (instr & 0xff) == 4) { - // POP - opcode << "pop"; - args << "{" << Rt << "}"; - } else { - bool wback = !p || w; - uint32_t offset = (instr & 0xff); - opcode << "ldr.w"; - args << Rt << ","; - if (p && !wback) { - args << "[" << Rn << ", #" << offset << "]"; - } else if (p && wback) { - args << "[" << Rn << ", #" << offset << "]!"; - } else if (!p && wback) { - args << "[" << Rn << "], #" << offset; - } else { - LOG(FATAL) << p << " " << w; - } - } - } - break; - } default: // more formats if ((op2 >> 4) == 2) { // 010xxxx // data processing (register) @@ -1808,6 +1781,23 @@ size_t DisassemblerArm::DumpThumb16(std::ostream& os, const uint8_t* instr_ptr) DumpBranchTarget(args, instr_ptr + 4, imm32); break; } + case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27: + case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: { + opcode << "push"; + args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 6)); + break; + } + case 0x60: case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: + case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: { + opcode << "pop"; + args << RegisterList((instr & 0xFF) | ((instr & 0x100) << 7)); + break; + } + case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: { + opcode << "bkpt"; + args << "#" << (instr & 0xFF); + break; + } case 0x50: case 0x51: // 101000x case 0x52: case 0x53: // 101001x case 0x56: case 0x57: { // 101011x |