summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.cc19
-rw-r--r--compiler/optimizing/code_generator.h5
-rw-r--r--compiler/optimizing/code_generator_arm.cc31
-rw-r--r--compiler/optimizing/code_generator_arm64.cc86
-rw-r--r--compiler/optimizing/code_generator_arm64.h2
-rw-r--r--compiler/optimizing/code_generator_utils.cc97
-rw-r--r--compiler/optimizing/code_generator_utils.h30
-rw-r--r--compiler/optimizing/code_generator_x86.cc560
-rw-r--r--compiler/optimizing/code_generator_x86.h7
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc525
-rw-r--r--compiler/optimizing/code_generator_x86_64.h18
-rw-r--r--compiler/optimizing/instruction_simplifier.cc228
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc59
-rw-r--r--compiler/optimizing/nodes.h10
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc127
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc141
-rw-r--r--compiler/optimizing/optimizing_compiler.cc44
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h2
-rw-r--r--compiler/optimizing/parallel_move_resolver.cc24
-rw-r--r--compiler/optimizing/parallel_move_resolver.h7
20 files changed, 1795 insertions, 227 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index da28dc7ecb..8736374306 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -82,6 +82,7 @@ void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline)
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
GenerateFrameEntry();
+ DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_));
for (size_t e = block_order_->Size(); current_block_index_ < e; ++current_block_index_) {
HBasicBlock* block = block_order_->Get(current_block_index_);
// Don't generate code for an empty block. Its predecessors will branch to its successor
@@ -415,7 +416,16 @@ void CodeGenerator::BuildNativeGCMap(
}
}
-void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap* src_map) const {
+void CodeGenerator::BuildSourceMap(DefaultSrcMap* src_map) const {
+ for (size_t i = 0; i < pc_infos_.Size(); i++) {
+ struct PcInfo pc_info = pc_infos_.Get(i);
+ uint32_t pc2dex_offset = pc_info.native_pc;
+ int32_t pc2dex_dalvik_offset = pc_info.dex_pc;
+ src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
+ }
+}
+
+void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data) const {
uint32_t pc2dex_data_size = 0u;
uint32_t pc2dex_entries = pc_infos_.Size();
uint32_t pc2dex_offset = 0u;
@@ -425,19 +435,12 @@ void CodeGenerator::BuildMappingTable(std::vector<uint8_t>* data, DefaultSrcMap*
uint32_t dex2pc_offset = 0u;
int32_t dex2pc_dalvik_offset = 0;
- if (src_map != nullptr) {
- src_map->reserve(pc2dex_entries);
- }
-
for (size_t i = 0; i < pc2dex_entries; i++) {
struct PcInfo pc_info = pc_infos_.Get(i);
pc2dex_data_size += UnsignedLeb128Size(pc_info.native_pc - pc2dex_offset);
pc2dex_data_size += SignedLeb128Size(pc_info.dex_pc - pc2dex_dalvik_offset);
pc2dex_offset = pc_info.native_pc;
pc2dex_dalvik_offset = pc_info.dex_pc;
- if (src_map != nullptr) {
- src_map->push_back(SrcMapElem({pc2dex_offset, pc2dex_dalvik_offset}));
- }
}
// Walk over the blocks and find which ones correspond to catch block entries.
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 07ca6b1ccf..b888aca264 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -205,7 +205,8 @@ class CodeGenerator {
slow_paths_.Add(slow_path);
}
- void BuildMappingTable(std::vector<uint8_t>* vector, DefaultSrcMap* src_map) const;
+ void BuildSourceMap(DefaultSrcMap* src_map) const;
+ void BuildMappingTable(std::vector<uint8_t>* vector) const;
void BuildVMapTable(std::vector<uint8_t>* vector) const;
void BuildNativeGCMap(
std::vector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
@@ -425,6 +426,8 @@ class CodeGenerator {
StackMapStream stack_map_stream_;
+ friend class OptimizingCFITest;
+
DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
};
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index cfc798a34e..a799a519c0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -513,6 +513,14 @@ void CodeGeneratorARM::ComputeSpillMask() {
}
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::ArmCore(static_cast<int>(reg));
+}
+
+static dwarf::Reg DWARFReg(SRegister reg) {
+ return dwarf::Reg::ArmFp(static_cast<int>(reg));
+}
+
void CodeGeneratorARM::GenerateFrameEntry() {
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
@@ -531,12 +539,19 @@ void CodeGeneratorARM::GenerateFrameEntry() {
// PC is in the list of callee-save to mimic Quick, but we need to push
// LR at entry instead.
- __ PushList((core_spill_mask_ & (~(1 << PC))) | 1 << LR);
+ uint32_t push_mask = (core_spill_mask_ & (~(1 << PC))) | 1 << LR;
+ __ PushList(push_mask);
+ __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(push_mask));
+ __ cfi().RelOffsetForMany(DWARFReg(Register(0)), 0, push_mask, kArmWordSize);
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
__ vpushs(start_register, POPCOUNT(fpu_spill_mask_));
+ __ cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
+ __ cfi().RelOffsetForMany(DWARFReg(SRegister(0)), 0, fpu_spill_mask_, kArmWordSize);
}
- __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ AddConstant(SP, -adjust);
+ __ cfi().AdjustCFAOffset(adjust);
__ StoreToOffset(kStoreWord, R0, SP, 0);
}
@@ -545,10 +560,14 @@ void CodeGeneratorARM::GenerateFrameExit() {
__ bx(LR);
return;
}
- __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize());
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ AddConstant(SP, adjust);
+ __ cfi().AdjustCFAOffset(-adjust);
if (fpu_spill_mask_ != 0) {
SRegister start_register = SRegister(LeastSignificantBit(fpu_spill_mask_));
__ vpops(start_register, POPCOUNT(fpu_spill_mask_));
+ __ cfi().AdjustCFAOffset(-kArmPointerSize * POPCOUNT(fpu_spill_mask_));
+ __ cfi().RestoreMany(DWARFReg(SRegister(0)), fpu_spill_mask_);
}
__ PopList(core_spill_mask_);
}
@@ -1190,7 +1209,10 @@ void LocationsBuilderARM::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorARM::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM::VisitReturn(HReturn* ret) {
@@ -1201,7 +1223,10 @@ void LocationsBuilderARM::VisitReturn(HReturn* ret) {
void InstructionCodeGeneratorARM::VisitReturn(HReturn* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 439e85ca6c..5fe8adc86a 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -465,20 +465,67 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
// ... : reserved frame space.
// sp[0] : current method.
__ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
- __ PokeCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
- __ PokeCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ GetAssembler()->cfi().AdjustCFAOffset(frame_size);
+ SpillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ SpillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
}
}
void CodeGeneratorARM64::GenerateFrameExit() {
if (!HasEmptyFrame()) {
int frame_size = GetFrameSize();
- __ PeekCPURegList(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
- __ PeekCPURegList(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
+ UnspillRegisters(GetFramePreservedFPRegisters(), frame_size - FrameEntrySpillSize());
+ UnspillRegisters(GetFramePreservedCoreRegisters(), frame_size - GetCoreSpillSize());
__ Drop(frame_size);
+ GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
}
}
+static inline dwarf::Reg DWARFReg(CPURegister reg) {
+ if (reg.IsFPRegister()) {
+ return dwarf::Reg::Arm64Fp(reg.code());
+ } else {
+ DCHECK_LT(reg.code(), 31u); // X0 - X30.
+ return dwarf::Reg::Arm64Core(reg.code());
+ }
+}
+
+void CodeGeneratorARM64::SpillRegisters(vixl::CPURegList registers, int offset) {
+ int size = registers.RegisterSizeInBytes();
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ __ Stp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst1), offset + size);
+ offset += 2 * size;
+ }
+ if (!registers.IsEmpty()) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ __ Str(dst0, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().RelOffset(DWARFReg(dst0), offset);
+ }
+ DCHECK(registers.IsEmpty());
+}
+
+void CodeGeneratorARM64::UnspillRegisters(vixl::CPURegList registers, int offset) {
+ int size = registers.RegisterSizeInBytes();
+ while (registers.Count() >= 2) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ const CPURegister& dst1 = registers.PopLowestIndex();
+ __ Ldp(dst0, dst1, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().Restore(DWARFReg(dst0));
+ GetAssembler()->cfi().Restore(DWARFReg(dst1));
+ offset += 2 * size;
+ }
+ if (!registers.IsEmpty()) {
+ const CPURegister& dst0 = registers.PopLowestIndex();
+ __ Ldr(dst0, MemOperand(__ StackPointer(), offset));
+ GetAssembler()->cfi().Restore(DWARFReg(dst0));
+ }
+ DCHECK(registers.IsEmpty());
+}
+
void CodeGeneratorARM64::Bind(HBasicBlock* block) {
__ Bind(GetLabelOf(block));
}
@@ -1659,11 +1706,26 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
Register lhs = InputRegisterAt(condition, 0);
Operand rhs = InputOperandAt(condition, 1);
Condition arm64_cond = ARM64Condition(condition->GetCondition());
- if ((arm64_cond == eq || arm64_cond == ne) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
- if (arm64_cond == eq) {
- __ Cbz(lhs, true_target);
- } else {
- __ Cbnz(lhs, true_target);
+ if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
+ switch (arm64_cond) {
+ case eq:
+ __ Cbz(lhs, true_target);
+ break;
+ case ne:
+ __ Cbnz(lhs, true_target);
+ break;
+ case lt:
+ // Test the sign bit and branch accordingly.
+ __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ case ge:
+ // Test the sign bit and branch accordingly.
+ __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ break;
+ default:
+ // Without the `static_cast` the compiler throws an error for
+ // `-Werror=sign-promo`.
+ LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
}
} else {
__ Cmp(lhs, rhs);
@@ -2403,8 +2465,11 @@ void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
void InstructionCodeGeneratorARM64::VisitReturn(HReturn* instruction) {
UNUSED(instruction);
+ GetAssembler()->cfi().RememberState();
codegen_->GenerateFrameExit();
__ Ret();
+ GetAssembler()->cfi().RestoreState();
+ GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
@@ -2413,8 +2478,11 @@ void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction) {
UNUSED(instruction);
+ GetAssembler()->cfi().RememberState();
codegen_->GenerateFrameExit();
__ Ret();
+ GetAssembler()->cfi().RestoreState();
+ GetAssembler()->cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderARM64::VisitShl(HShl* shl) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7edb129880..9430e31037 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -227,6 +227,8 @@ class CodeGeneratorARM64 : public CodeGenerator {
void GenerateFrameEntry() OVERRIDE;
void GenerateFrameExit() OVERRIDE;
+ void SpillRegisters(vixl::CPURegList registers, int offset);
+ void UnspillRegisters(vixl::CPURegList registers, int offset);
vixl::CPURegList GetFramePreservedCoreRegisters() const {
return vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize,
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
new file mode 100644
index 0000000000..921c1d86c2
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_utils.h"
+
+#include "base/logging.h"
+
+namespace art {
+
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
+ int64_t* magic, int* shift) {
+ // It does not make sense to calculate magic and shift for zero divisor.
+ DCHECK_NE(divisor, 0);
+
+ /* Implementation according to H.S.Warren's "Hacker's Delight" (Addison Wesley, 2002)
+ * Chapter 10 and T.Grablund, P.L.Montogomery's "Division by Invariant Integers Using
+ * Multiplication" (PLDI 1994).
+ * The magic number M and shift S can be calculated in the following way:
+ * Let nc be the most positive value of numerator(n) such that nc = kd - 1,
+ * where divisor(d) >= 2.
+ * Let nc be the most negative value of numerator(n) such that nc = kd + 1,
+ * where divisor(d) <= -2.
+ * Thus nc can be calculated like:
+ * nc = exp + exp % d - 1, where d >= 2 and exp = 2^31 for int or 2^63 for long
+ * nc = -exp + (exp + 1) % d, where d >= 2 and exp = 2^31 for int or 2^63 for long
+ *
+ * So the shift p is the smallest p satisfying
+ * 2^p > nc * (d - 2^p % d), where d >= 2
+ * 2^p > nc * (d + 2^p % d), where d <= -2.
+ *
+ * The magic number M is calculated by
+ * M = (2^p + d - 2^p % d) / d, where d >= 2
+ * M = (2^p - d - 2^p % d) / d, where d <= -2.
+ *
+ * Notice that p is always bigger than or equal to 32 (resp. 64), so we just return 32 - p
+ * (resp. 64 - p) as the shift number S.
+ */
+
+ int64_t p = is_long ? 63 : 31;
+ const uint64_t exp = is_long ? (UINT64_C(1) << 63) : (UINT32_C(1) << 31);
+
+ // Initialize the computations.
+ uint64_t abs_d = (divisor >= 0) ? divisor : -divisor;
+ uint64_t sign_bit = is_long ? static_cast<uint64_t>(divisor) >> 63 :
+ static_cast<uint32_t>(divisor) >> 31;
+ uint64_t tmp = exp + sign_bit;
+ uint64_t abs_nc = tmp - 1 - (tmp % abs_d);
+ uint64_t quotient1 = exp / abs_nc;
+ uint64_t remainder1 = exp % abs_nc;
+ uint64_t quotient2 = exp / abs_d;
+ uint64_t remainder2 = exp % abs_d;
+
+ /*
+ * To avoid handling both positive and negative divisor, "Hacker's Delight"
+ * introduces a method to handle these 2 cases together to avoid duplication.
+ */
+ uint64_t delta;
+ do {
+ p++;
+ quotient1 = 2 * quotient1;
+ remainder1 = 2 * remainder1;
+ if (remainder1 >= abs_nc) {
+ quotient1++;
+ remainder1 = remainder1 - abs_nc;
+ }
+ quotient2 = 2 * quotient2;
+ remainder2 = 2 * remainder2;
+ if (remainder2 >= abs_d) {
+ quotient2++;
+ remainder2 = remainder2 - abs_d;
+ }
+ delta = abs_d - remainder2;
+ } while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
+
+ *magic = (divisor > 0) ? (quotient2 + 1) : (-quotient2 - 1);
+
+ if (!is_long) {
+ *magic = static_cast<int>(*magic);
+ }
+
+ *shift = is_long ? p - 64 : p - 32;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
new file mode 100644
index 0000000000..59b495c2c9
--- /dev/null
+++ b/compiler/optimizing/code_generator_utils.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
+
+#include <cstdint>
+
+namespace art {
+
+// Computes the magic number and the shift needed in the div/rem by constant algorithm, as out
+// arguments `magic` and `shift`
+void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 92b62e2c84..a6fb07fa98 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86.h"
+#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -459,7 +460,12 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene
assembler_(codegen->GetAssembler()),
codegen_(codegen) {}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86Core(static_cast<int>(reg));
+}
+
void CodeGeneratorX86::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
__ Bind(&frame_entry_label_);
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
@@ -478,10 +484,14 @@ void CodeGeneratorX86::GenerateFrameEntry() {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ pushl(reg);
+ __ cfi().AdjustCFAOffset(kX86WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
}
}
- __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ subl(ESP, Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
__ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
}
@@ -490,12 +500,16 @@ void CodeGeneratorX86::GenerateFrameExit() {
return;
}
- __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ addl(ESP, Immediate(adjust));
+ __ cfi().AdjustCFAOffset(-adjust);
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ popl(reg);
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kX86WordSize));
+ __ cfi().Restore(DWARFReg(reg));
}
}
}
@@ -1102,8 +1116,11 @@ void LocationsBuilderX86::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86::VisitReturn(HReturn* ret) {
@@ -1161,8 +1178,11 @@ void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) {
LOG(FATAL) << "Unknown return type " << ret->InputAt(0)->GetType();
}
}
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -2278,6 +2298,133 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
__ addl(ESP, Immediate(2 * elem_size));
}
+
+void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(1).IsConstant());
+ DCHECK(locations->InAt(1).GetConstant()->IsIntConstant());
+
+ Register out_register = locations->Out().AsRegister<Register>();
+ Register input_register = locations->InAt(0).AsRegister<Register>();
+ int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ xorl(out_register, out_register);
+ } else {
+ __ movl(out_register, input_register);
+ if (imm == -1) {
+ __ negl(out_register);
+ }
+ }
+}
+
+
+void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+
+ Register out_register = locations->Out().AsRegister<Register>();
+ Register input_register = locations->InAt(0).AsRegister<Register>();
+ int32_t imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+ DCHECK(IsPowerOfTwo(std::abs(imm)));
+ Register num = locations->GetTemp(0).AsRegister<Register>();
+
+ __ leal(num, Address(input_register, std::abs(imm) - 1));
+ __ testl(input_register, input_register);
+ __ cmovl(kGreaterEqual, num, input_register);
+ int shift = CTZ(imm);
+ __ sarl(num, Immediate(shift));
+
+ if (imm < 0) {
+ __ negl(num);
+ }
+
+ __ movl(out_register, num);
+}
+
+void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ int imm = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+
+ Register eax = locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+ Register num;
+ Register edx;
+
+ if (instruction->IsDiv()) {
+ edx = locations->GetTemp(0).AsRegister<Register>();
+ num = locations->GetTemp(1).AsRegister<Register>();
+ } else {
+ edx = locations->Out().AsRegister<Register>();
+ num = locations->GetTemp(0).AsRegister<Register>();
+ }
+
+ DCHECK_EQ(EAX, eax);
+ DCHECK_EQ(EDX, edx);
+ if (instruction->IsDiv()) {
+ DCHECK_EQ(EAX, out);
+ } else {
+ DCHECK_EQ(EDX, out);
+ }
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+ Label ndiv;
+ Label end;
+ // If numerator is 0, the result is 0, no computation needed.
+ __ testl(eax, eax);
+ __ j(kNotEqual, &ndiv);
+
+ __ xorl(out, out);
+ __ jmp(&end);
+
+ __ Bind(&ndiv);
+
+ // Save the numerator.
+ __ movl(num, eax);
+
+ // EAX = magic
+ __ movl(eax, Immediate(magic));
+
+ // EDX:EAX = magic * numerator
+ __ imull(num);
+
+ if (imm > 0 && magic < 0) {
+ // EDX += num
+ __ addl(edx, num);
+ } else if (imm < 0 && magic > 0) {
+ __ subl(edx, num);
+ }
+
+ // Shift if needed.
+ if (shift != 0) {
+ __ sarl(edx, Immediate(shift));
+ }
+
+ // EDX += 1 if EDX < 0
+ __ movl(eax, edx);
+ __ shrl(edx, Immediate(31));
+ __ addl(edx, eax);
+
+ if (instruction->IsRem()) {
+ __ movl(eax, num);
+ __ imull(edx, Immediate(imm));
+ __ subl(eax, edx);
+ __ movl(edx, eax);
+ } else {
+ __ movl(eax, edx);
+ }
+ __ Bind(&end);
+}
+
void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
@@ -2289,28 +2436,42 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
switch (instruction->GetResultType()) {
case Primitive::kPrimInt: {
- Register second_reg = second.AsRegister<Register>();
DCHECK_EQ(EAX, first.AsRegister<Register>());
DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
- SlowPathCodeX86* slow_path =
- new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
- is_div);
- codegen_->AddSlowPath(slow_path);
+ if (instruction->InputAt(1)->IsIntConstant()) {
+ int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
- // 0x80000000/-1 triggers an arithmetic exception!
- // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
- // it's safe to just use negl instead of more complex comparisons.
+ if (imm == 0) {
+ // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (is_div && IsPowerOfTwo(std::abs(imm))) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+ } else {
+ SlowPathCodeX86* slow_path =
+ new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86(out.AsRegister<Register>(),
+ is_div);
+ codegen_->AddSlowPath(slow_path);
- __ cmpl(second_reg, Immediate(-1));
- __ j(kEqual, slow_path->GetEntryLabel());
+ Register second_reg = second.AsRegister<Register>();
+ // 0x80000000/-1 triggers an arithmetic exception!
+ // Dividing by -1 is actually negation and -0x800000000 = 0x80000000 so
+ // it's safe to just use negl instead of more complex comparisons.
- // edx:eax <- sign-extended of eax
- __ cdq();
- // eax = quotient, edx = remainder
- __ idivl(second_reg);
+ __ cmpl(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // edx:eax <- sign-extended of eax
+ __ cdq();
+ // eax = quotient, edx = remainder
+ __ idivl(second_reg);
+ __ Bind(slow_path->GetExitLabel());
+ }
break;
}
@@ -2350,10 +2511,16 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) {
switch (div->GetResultType()) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RegisterLocation(EAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
// Intel uses edx:eax as the dividend.
locations->AddTemp(Location::RegisterLocation(EDX));
+ // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+ // which enforces results to be in EAX and EDX, things are simpler if we use EAX also as
+ // output and request another temp.
+ if (div->InputAt(1)->IsIntConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
case Primitive::kPrimLong: {
@@ -2411,6 +2578,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) {
void LocationsBuilderX86::VisitRem(HRem* rem) {
Primitive::Type type = rem->GetResultType();
+
LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong)
? LocationSummary::kCall
: LocationSummary::kNoCall;
@@ -2419,8 +2587,14 @@ void LocationsBuilderX86::VisitRem(HRem* rem) {
switch (type) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RegisterLocation(EAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
locations->SetOut(Location::RegisterLocation(EDX));
+ // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+ // which enforces results to be in EAX and EDX, things are simpler if we use EDX also as
+ // output and request another temp.
+ if (rem->InputAt(1)->IsIntConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
case Primitive::kPrimLong: {
@@ -2538,16 +2712,16 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
- locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
+ locations->SetInAt(0, Location::Any());
+ // The shift count needs to be in CL or a constant.
locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- // The shift count needs to be in CL.
- locations->SetInAt(1, Location::RegisterLocation(ECX));
+ // The shift count needs to be in CL or a constant.
+ locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, op->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2566,38 +2740,87 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
- Register first_reg = first.AsRegister<Register>();
- if (second.IsRegister()) {
- Register second_reg = second.AsRegister<Register>();
- DCHECK_EQ(ECX, second_reg);
- if (op->IsShl()) {
- __ shll(first_reg, second_reg);
- } else if (op->IsShr()) {
- __ sarl(first_reg, second_reg);
+ if (first.IsRegister()) {
+ Register first_reg = first.AsRegister<Register>();
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ __ shll(first_reg, second_reg);
+ } else if (op->IsShr()) {
+ __ sarl(first_reg, second_reg);
+ } else {
+ __ shrl(first_reg, second_reg);
+ }
} else {
- __ shrl(first_reg, second_reg);
+ int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+ if (shift == 0) {
+ return;
+ }
+ Immediate imm(shift);
+ if (op->IsShl()) {
+ __ shll(first_reg, imm);
+ } else if (op->IsShr()) {
+ __ sarl(first_reg, imm);
+ } else {
+ __ shrl(first_reg, imm);
+ }
}
} else {
- Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue);
- if (op->IsShl()) {
- __ shll(first_reg, imm);
- } else if (op->IsShr()) {
- __ sarl(first_reg, imm);
+ DCHECK(first.IsStackSlot()) << first;
+ Address addr(ESP, first.GetStackIndex());
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ __ shll(addr, second_reg);
+ } else if (op->IsShr()) {
+ __ sarl(addr, second_reg);
+ } else {
+ __ shrl(addr, second_reg);
+ }
} else {
- __ shrl(first_reg, imm);
+ int32_t shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftValue;
+ if (shift == 0) {
+ return;
+ }
+ Immediate imm(shift);
+ if (op->IsShl()) {
+ __ shll(addr, imm);
+ } else if (op->IsShr()) {
+ __ sarl(addr, imm);
+ } else {
+ __ shrl(addr, imm);
+ }
}
}
+
break;
}
case Primitive::kPrimLong: {
- Register second_reg = second.AsRegister<Register>();
- DCHECK_EQ(ECX, second_reg);
- if (op->IsShl()) {
- GenerateShlLong(first, second_reg);
- } else if (op->IsShr()) {
- GenerateShrLong(first, second_reg);
+ if (second.IsRegister()) {
+ Register second_reg = second.AsRegister<Register>();
+ DCHECK_EQ(ECX, second_reg);
+ if (op->IsShl()) {
+ GenerateShlLong(first, second_reg);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, second_reg);
+ } else {
+ GenerateUShrLong(first, second_reg);
+ }
} else {
- GenerateUShrLong(first, second_reg);
+ // Shift by a constant.
+ int shift = second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftValue;
+ // Nothing to do if the shift is 0, as the input is already the output.
+ if (shift != 0) {
+ if (op->IsShl()) {
+ GenerateShlLong(first, shift);
+ } else if (op->IsShr()) {
+ GenerateShrLong(first, shift);
+ } else {
+ GenerateUShrLong(first, shift);
+ }
+ }
}
break;
}
@@ -2606,6 +2829,26 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) {
}
}
+void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Shift by 32 is easy. High gets low, and low gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToLow(), loc.ToHigh(),
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow());
+ } else if (shift > 32) {
+ // Low part becomes 0. High part is low part << (shift-32).
+ __ movl(high, low);
+ __ shll(high, Immediate(shift - 32));
+ __ xorl(low, low);
+ } else {
+ // Between 1 and 31.
+ __ shld(high, low, Immediate(shift));
+ __ shll(low, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register shifter) {
Label done;
__ shld(loc.AsRegisterPairHigh<Register>(), loc.AsRegisterPairLow<Register>(), shifter);
@@ -2617,6 +2860,27 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Need to copy the sign.
+ DCHECK_NE(low, high);
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ } else if (shift > 32) {
+ DCHECK_NE(low, high);
+ // High part becomes sign. Low part is shifted by shift - 32.
+ __ movl(low, high);
+ __ sarl(high, Immediate(31));
+ __ shrl(low, Immediate(shift - 32));
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ sarl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -2628,6 +2892,26 @@ void InstructionCodeGeneratorX86::GenerateShrLong(const Location& loc, Register
__ Bind(&done);
}
+void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shift) {
+ Register low = loc.AsRegisterPairLow<Register>();
+ Register high = loc.AsRegisterPairHigh<Register>();
+ if (shift == 32) {
+ // Shift by 32 is easy. Low gets high, and high gets 0.
+ codegen_->EmitParallelMoves(
+ loc.ToHigh(), loc.ToLow(),
+ Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh());
+ } else if (shift > 32) {
+ // Low part is high >> (shift - 32). High part becomes 0.
+ __ movl(low, high);
+ __ shrl(low, Immediate(shift - 32));
+ __ xorl(high, high);
+ } else {
+ // Between 1 and 31.
+ __ shrd(low, high, Immediate(shift));
+ __ shrl(high, Immediate(shift));
+ }
+}
+
void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register shifter) {
Label done;
__ shrd(loc.AsRegisterPairLow<Register>(), loc.AsRegisterPairHigh<Register>(), shifter);
@@ -3388,7 +3672,13 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
// Ensure the value is in a byte register.
locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
} else {
- locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+ bool is_fp_type = (value_type == Primitive::kPrimFloat)
+ || (value_type == Primitive::kPrimDouble);
+ if (is_fp_type) {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
+ }
}
// Temporary registers for the write barrier.
if (needs_write_barrier) {
@@ -3667,23 +3957,43 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const {
}
void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack.
+ __ pushl(Address(ESP, src));
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ }
}
void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, src + stack_offset));
- __ movl(Address(ESP, dst + stack_offset), temp_reg);
- __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize));
- __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp = possible_scratch.GetRegister();
+ if (temp == kNoRegister) {
+ // Use the stack instead.
+ // Push src low word.
+ __ pushl(Address(ESP, src));
+ // Push src high word. Stack offset = 4.
+ __ pushl(Address(ESP, src + 4 /* offset */ + kX86WordSize /* high */));
+
+ // Pop into dst high word. Stack offset = 8.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, dst + 4 /* offset */ + kX86WordSize /* high */));
+ // Finally dst low word. Stack offset = 4.
+ __ popl(Address(ESP, dst));
+ } else {
+ Register temp_reg = static_cast<Register>(temp);
+ __ movl(temp_reg, Address(ESP, src));
+ __ movl(Address(ESP, dst), temp_reg);
+ __ movl(temp_reg, Address(ESP, src + kX86WordSize));
+ __ movl(Address(ESP, dst + kX86WordSize), temp_reg);
+ }
}
void ParallelMoveResolverX86::EmitMove(size_t index) {
@@ -3748,10 +4058,18 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
__ xorps(dest, dest);
} else {
ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
- Register temp = static_cast<Register>(ensure_scratch.GetRegister());
- __ movl(temp, Immediate(value));
- __ movd(dest, temp);
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Avoid spilling/restoring a scratch register by using the stack.
+ __ pushl(Immediate(value));
+ __ movss(dest, Address(ESP, 0));
+ __ addl(ESP, Immediate(4));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Immediate(value));
+ __ movd(dest, temp);
+ }
}
} else {
DCHECK(destination.IsStackSlot()) << destination;
@@ -3800,42 +4118,96 @@ void ParallelMoveResolverX86::EmitMove(size_t index) {
}
}
-void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
- Register suggested_scratch = reg == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch(
- this, reg, suggested_scratch, codegen_->GetNumberOfCoreRegisters());
+void ParallelMoveResolverX86::Exchange(Register reg1, Register reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ ScratchRegisterScope possible_scratch(
+ this, reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister || temp_reg == reg2) {
+ __ pushl(reg1);
+ __ movl(reg1, reg2);
+ __ popl(reg2);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, reg1);
+ __ movl(reg1, reg2);
+ __ movl(reg2, temp);
+ }
+}
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch.GetRegister()), Address(ESP, mem + stack_offset));
- __ movl(Address(ESP, mem + stack_offset), reg);
- __ movl(reg, static_cast<Register>(ensure_scratch.GetRegister()));
+void ParallelMoveResolverX86::Exchange(Register reg, int mem) {
+ ScratchRegisterScope possible_scratch(
+ this, reg, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movl(Address(ESP, mem + kX86WordSize), reg);
+ __ popl(reg);
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movl(Address(ESP, mem), reg);
+ __ movl(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) {
- ScratchRegisterScope ensure_scratch(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
- Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0;
- __ movl(temp_reg, Address(ESP, mem + stack_offset));
- __ movss(Address(ESP, mem + stack_offset), reg);
- __ movd(reg, temp_reg);
+ ScratchRegisterScope possible_scratch(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg = possible_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ __ pushl(Address(ESP, mem));
+ __ movss(Address(ESP, mem + kX86WordSize), reg);
+ __ movss(reg, Address(ESP, 0));
+ __ addl(ESP, Immediate(kX86WordSize));
+ } else {
+ Register temp = static_cast<Register>(temp_reg);
+ __ movl(temp, Address(ESP, mem));
+ __ movss(Address(ESP, mem), reg);
+ __ movd(reg, temp);
+ }
}
void ParallelMoveResolverX86::Exchange(int mem1, int mem2) {
- ScratchRegisterScope ensure_scratch1(
- this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters());
-
- Register suggested_scratch = ensure_scratch1.GetRegister() == EAX ? EBX : EAX;
- ScratchRegisterScope ensure_scratch2(
- this, ensure_scratch1.GetRegister(), suggested_scratch, codegen_->GetNumberOfCoreRegisters());
-
- int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0;
- stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0;
- __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset));
- __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset));
- __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister()));
- __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister()));
+ ScratchRegisterScope possible_scratch1(
+ this, kNoRegister, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg1 = possible_scratch1.GetRegister();
+ if (temp_reg1 == kNoRegister) {
+ // No free registers. Use the stack.
+ __ pushl(Address(ESP, mem1));
+ __ pushl(Address(ESP, mem2 + kX86WordSize));
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem1 + kX86WordSize));
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Got the first one. Try for a second.
+ ScratchRegisterScope possible_scratch2(
+ this, temp_reg1, codegen_->GetNumberOfCoreRegisters());
+ int temp_reg2 = possible_scratch2.GetRegister();
+ if (temp_reg2 == kNoRegister) {
+ Register temp = static_cast<Register>(temp_reg1);
+ // Bummer. Only have one free register to use.
+ // Save mem1 on the stack.
+ __ pushl(Address(ESP, mem1));
+
+ // Copy mem2 into mem1.
+ __ movl(temp, Address(ESP, mem2 + kX86WordSize));
+ __ movl(Address(ESP, mem1 + kX86WordSize), temp);
+
+ // Now pop mem1 into mem2.
+ // Pop with ESP address uses the 'after increment' value of ESP.
+ __ popl(Address(ESP, mem2));
+ } else {
+ // Great. We have 2 registers to play with.
+ Register temp1 = static_cast<Register>(temp_reg1);
+ Register temp2 = static_cast<Register>(temp_reg2);
+ DCHECK_NE(temp1, temp2);
+ __ movl(temp1, Address(ESP, mem1));
+ __ movl(temp2, Address(ESP, mem2));
+ __ movl(Address(ESP, mem2), temp1);
+ __ movl(Address(ESP, mem1), temp2);
+ }
+ }
}
void ParallelMoveResolverX86::EmitSwap(size_t index) {
@@ -3844,7 +4216,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgl(destination.AsRegister<Register>(), source.AsRegister<Register>());
+ Exchange(destination.AsRegister<Register>(), source.AsRegister<Register>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange(source.AsRegister<Register>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 0cc3c6533a..8c56e35329 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -106,6 +106,7 @@ class ParallelMoveResolverX86 : public ParallelMoveResolver {
X86Assembler* GetAssembler() const;
private:
+ void Exchange(Register reg1, Register Reg2);
void Exchange(Register reg, int mem);
void Exchange(int mem1, int mem2);
void Exchange32(XmmRegister reg, int mem);
@@ -163,11 +164,17 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateClassInitializationCheck(SlowPathCodeX86* slow_path, Register class_reg);
void HandleBitwiseOperation(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivByPowerOfTwo(HDiv* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateRemFP(HRem *rem);
void HandleShift(HBinaryOperation* instruction);
void GenerateShlLong(const Location& loc, Register shifter);
void GenerateShrLong(const Location& loc, Register shifter);
void GenerateUShrLong(const Location& loc, Register shifter);
+ void GenerateShlLong(const Location& loc, int shift);
+ void GenerateShrLong(const Location& loc, int shift);
+ void GenerateUShrLong(const Location& loc, int shift);
void GenerateMemoryBarrier(MemBarrierKind kind);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cdbc7780a8..01b24ea33f 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,6 +16,7 @@
#include "code_generator_x86_64.h"
+#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
@@ -428,7 +429,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
- isa_features_(isa_features) {
+ isa_features_(isa_features),
+ constant_area_start_(0) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -481,7 +483,15 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
}
}
+static dwarf::Reg DWARFReg(Register reg) {
+ return dwarf::Reg::X86_64Core(static_cast<int>(reg));
+}
+static dwarf::Reg DWARFReg(FloatRegister reg) {
+ return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
+}
+
void CodeGeneratorX86_64::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
__ Bind(&frame_entry_label_);
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
@@ -501,17 +511,22 @@ void CodeGeneratorX86_64::GenerateFrameEntry() {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ pushq(CpuRegister(reg));
+ __ cfi().AdjustCFAOffset(kX86_64WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
}
}
- __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ int adjust = GetFrameSize() - GetCoreSpillSize();
+ __ subq(CpuRegister(RSP), Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
uint32_t xmm_spill_location = GetFpuSpillStart();
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
- __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)),
- XmmRegister(kFpuCalleeSaves[i]));
+ int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+ __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
+ __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
}
}
@@ -526,17 +541,22 @@ void CodeGeneratorX86_64::GenerateFrameExit() {
size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
- __ movsd(XmmRegister(kFpuCalleeSaves[i]),
- Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)));
+ int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+ __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
+ __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
}
}
- __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize()));
+ int adjust = GetFrameSize() - GetCoreSpillSize();
+ __ addq(CpuRegister(RSP), Immediate(adjust));
+ __ cfi().AdjustCFAOffset(-adjust);
for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
Register reg = kCoreCalleeSaves[i];
if (allocated_registers_.ContainsCoreRegister(reg)) {
__ popq(CpuRegister(reg));
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
+ __ cfi().Restore(DWARFReg(reg));
}
}
}
@@ -1123,8 +1143,11 @@ void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret) {
UNUSED(ret);
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
@@ -1175,8 +1198,11 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
}
}
+ __ cfi().RememberState();
codegen_->GenerateFrameExit();
__ ret();
+ __ cfi().RestoreState();
+ __ cfi().DefCFAOffset(codegen_->GetFrameSize());
}
Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type) {
@@ -1951,7 +1977,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
case Primitive::kPrimDouble:
case Primitive::kPrimFloat: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2015,12 +2041,30 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
}
case Primitive::kPrimFloat: {
- __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ addss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ addsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2048,7 +2092,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2086,12 +2130,30 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
}
case Primitive::kPrimFloat: {
- __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ subss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ subsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2124,7 +2186,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2169,13 +2231,31 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
case Primitive::kPrimFloat: {
DCHECK(first.Equals(locations->Out()));
- __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ mulss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
DCHECK(first.Equals(locations->Out()));
- __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ mulsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2259,6 +2339,216 @@ void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
__ addq(CpuRegister(RSP), Immediate(2 * elem_size));
}
+void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+
+ DCHECK(imm == 1 || imm == -1);
+
+ switch (instruction->GetResultType()) {
+ case Primitive::kPrimInt: {
+ if (instruction->IsRem()) {
+ __ xorl(output_register, output_register);
+ } else {
+ __ movl(output_register, input_register);
+ if (imm == -1) {
+ __ negl(output_register);
+ }
+ }
+ break;
+ }
+
+ case Primitive::kPrimLong: {
+ if (instruction->IsRem()) {
+ __ xorq(output_register, output_register);
+ } else {
+ __ movq(output_register, input_register);
+ if (imm == -1) {
+ __ negq(output_register);
+ }
+ }
+ break;
+ }
+
+ default:
+ LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+
+ CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+
+ int64_t imm = Int64FromConstant(second.GetConstant());
+
+ DCHECK(IsPowerOfTwo(std::abs(imm)));
+
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ __ leal(tmp, Address(numerator, std::abs(imm) - 1));
+ __ testl(numerator, numerator);
+ __ cmov(kGreaterEqual, tmp, numerator);
+ int shift = CTZ(imm);
+ __ sarl(tmp, Immediate(shift));
+
+ if (imm < 0) {
+ __ negl(tmp);
+ }
+
+ __ movl(output_register, tmp);
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+ CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
+
+ __ movq(rdx, Immediate(std::abs(imm) - 1));
+ __ addq(rdx, numerator);
+ __ testq(numerator, numerator);
+ __ cmov(kGreaterEqual, rdx, numerator);
+ int shift = CTZ(imm);
+ __ sarq(rdx, Immediate(shift));
+
+ if (imm < 0) {
+ __ negq(rdx);
+ }
+
+ __ movq(output_register, rdx);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+
+ CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
+ : locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
+ : locations->Out().AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ DCHECK_EQ(RAX, eax.AsRegister());
+ DCHECK_EQ(RDX, edx.AsRegister());
+ if (instruction->IsDiv()) {
+ DCHECK_EQ(RAX, out.AsRegister());
+ } else {
+ DCHECK_EQ(RDX, out.AsRegister());
+ }
+
+ int64_t magic;
+ int shift;
+
+ // TODO: can these branches be written as one?
+ if (instruction->GetResultType() == Primitive::kPrimInt) {
+ int imm = second.GetConstant()->AsIntConstant()->GetValue();
+
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+
+ __ movl(numerator, eax);
+
+ Label no_div;
+ Label end;
+ __ testl(eax, eax);
+ __ j(kNotEqual, &no_div);
+
+ __ xorl(out, out);
+ __ jmp(&end);
+
+ __ Bind(&no_div);
+
+ __ movl(eax, Immediate(magic));
+ __ imull(numerator);
+
+ if (imm > 0 && magic < 0) {
+ __ addl(edx, numerator);
+ } else if (imm < 0 && magic > 0) {
+ __ subl(edx, numerator);
+ }
+
+ if (shift != 0) {
+ __ sarl(edx, Immediate(shift));
+ }
+
+ __ movl(eax, edx);
+ __ shrl(edx, Immediate(31));
+ __ addl(edx, eax);
+
+ if (instruction->IsRem()) {
+ __ movl(eax, numerator);
+ __ imull(edx, Immediate(imm));
+ __ subl(eax, edx);
+ __ movl(edx, eax);
+ } else {
+ __ movl(eax, edx);
+ }
+ __ Bind(&end);
+ } else {
+ int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
+
+ DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+
+ CpuRegister rax = eax;
+ CpuRegister rdx = edx;
+
+ CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
+
+ // Save the numerator.
+ __ movq(numerator, rax);
+
+ // RAX = magic
+ __ movq(rax, Immediate(magic));
+
+ // RDX:RAX = magic * numerator
+ __ imulq(numerator);
+
+ if (imm > 0 && magic < 0) {
+ // RDX += numerator
+ __ addq(rdx, numerator);
+ } else if (imm < 0 && magic > 0) {
+ // RDX -= numerator
+ __ subq(rdx, numerator);
+ }
+
+ // Shift if needed.
+ if (shift != 0) {
+ __ sarq(rdx, Immediate(shift));
+ }
+
+ // RDX += 1 if RDX < 0
+ __ movq(rax, rdx);
+ __ shrq(rdx, Immediate(63));
+ __ addq(rdx, rax);
+
+ if (instruction->IsRem()) {
+ __ movq(rax, numerator);
+
+ if (IsInt<32>(imm)) {
+ __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
+ } else {
+ __ movq(numerator, Immediate(imm));
+ __ imulq(rdx, numerator);
+ }
+
+ __ subq(rax, rdx);
+ __ movq(rdx, rax);
+ } else {
+ __ movq(rax, rdx);
+ }
+ }
+}
+
void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
@@ -2267,37 +2557,52 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
bool is_div = instruction->IsDiv();
LocationSummary* locations = instruction->GetLocations();
- CpuRegister out_reg = locations->Out().AsRegister<CpuRegister>();
- CpuRegister second_reg = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location second = locations->InAt(1);
DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
- DCHECK_EQ(is_div ? RAX : RDX, out_reg.AsRegister());
+ DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
- SlowPathCodeX86_64* slow_path =
- new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
- out_reg.AsRegister(), type, is_div);
- codegen_->AddSlowPath(slow_path);
+ if (second.IsConstant()) {
+ int64_t imm = Int64FromConstant(second.GetConstant());
- // 0x80000000(00000000)/-1 triggers an arithmetic exception!
- // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
- // so it's safe to just use negl instead of more complex comparisons.
- if (type == Primitive::kPrimInt) {
- __ cmpl(second_reg, Immediate(-1));
- __ j(kEqual, slow_path->GetEntryLabel());
- // edx:eax <- sign-extended of eax
- __ cdq();
- // eax = quotient, edx = remainder
- __ idivl(second_reg);
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (instruction->IsDiv() && IsPowerOfTwo(std::abs(imm))) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
} else {
- __ cmpq(second_reg, Immediate(-1));
- __ j(kEqual, slow_path->GetEntryLabel());
- // rdx:rax <- sign-extended of rax
- __ cqo();
- // rax = quotient, rdx = remainder
- __ idivq(second_reg);
- }
+ SlowPathCodeX86_64* slow_path =
+ new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
+ out.AsRegister(), type, is_div);
+ codegen_->AddSlowPath(slow_path);
- __ Bind(slow_path->GetExitLabel());
+ CpuRegister second_reg = second.AsRegister<CpuRegister>();
+ // 0x80000000(00000000)/-1 triggers an arithmetic exception!
+ // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
+ // so it's safe to just use negl instead of more complex comparisons.
+ if (type == Primitive::kPrimInt) {
+ __ cmpl(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ // edx:eax <- sign-extended of eax
+ __ cdq();
+ // eax = quotient, edx = remainder
+ __ idivl(second_reg);
+ } else {
+ __ cmpq(second_reg, Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ // rdx:rax <- sign-extended of rax
+ __ cqo();
+ // rax = quotient, rdx = remainder
+ __ idivq(second_reg);
+ }
+ __ Bind(slow_path->GetExitLabel());
+ }
}
void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
@@ -2307,17 +2612,23 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RegisterLocation(RAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
// Intel uses edx:eax as the dividend.
locations->AddTemp(Location::RegisterLocation(RDX));
+ // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
+ // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
+ // output and request another temp.
+ if (div->InputAt(1)->IsConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::Any());
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -2342,12 +2653,30 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
}
case Primitive::kPrimFloat: {
- __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralFloatAddress(second.GetConstant()->AsFloatConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsStackSlot());
+ __ divss(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
case Primitive::kPrimDouble: {
- __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ if (second.IsFpuRegister()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
+ } else if (second.IsConstant()) {
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ codegen_->LiteralDoubleAddress(second.GetConstant()->AsDoubleConstant()->GetValue()));
+ } else {
+ DCHECK(second.IsDoubleStackSlot());
+ __ divsd(first.AsFpuRegister<XmmRegister>(),
+ Address(CpuRegister(RSP), second.GetStackIndex()));
+ }
break;
}
@@ -2365,9 +2694,15 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) {
case Primitive::kPrimInt:
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RegisterLocation(RAX));
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
// Intel uses rdx:rax as the dividend and puts the remainder in rdx
locations->SetOut(Location::RegisterLocation(RDX));
+ // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
+ // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
+ // output and request another temp.
+ if (rem->InputAt(1)->IsConstant()) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
break;
}
@@ -3486,15 +3821,27 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
ScratchRegisterScope ensure_scratch(
- this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
+ this, TMP, codegen_->GetNumberOfCoreRegisters());
- int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
- __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
- __ movq(CpuRegister(ensure_scratch.GetRegister()),
- Address(CpuRegister(RSP), mem2 + stack_offset));
- __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
- __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
- CpuRegister(ensure_scratch.GetRegister()));
+ int temp_reg = ensure_scratch.GetRegister();
+ if (temp_reg == kNoRegister) {
+ // Use the stack as a temporary.
+ // Save mem1 on the stack.
+ __ pushq(Address(CpuRegister(RSP), mem1));
+
+ // Copy mem2 into mem1.
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem2 + kX86_64WordSize));
+ __ movq(Address(CpuRegister(RSP), mem1 + kX86_64WordSize), CpuRegister(TMP));
+
+ // Now pop mem1 into mem2.
+ __ popq(Address(CpuRegister(RSP), mem2));
+ } else {
+ CpuRegister temp = CpuRegister(temp_reg);
+ __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1));
+ __ movq(temp, Address(CpuRegister(RSP), mem2));
+ __ movq(Address(CpuRegister(RSP), mem2), CpuRegister(TMP));
+ __ movq(Address(CpuRegister(RSP), mem1), temp);
+ }
}
void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
@@ -3503,6 +3850,13 @@ void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
__ movd(reg, CpuRegister(TMP));
}
+void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
+ // Prefer to avoid xchg as it isn't speedy on smaller processors.
+ __ movq(CpuRegister(TMP), reg1);
+ __ movq(reg1, reg2);
+ __ movq(reg2, CpuRegister(TMP));
+}
+
void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
__ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
__ movsd(Address(CpuRegister(RSP), mem), reg);
@@ -3515,7 +3869,7 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
Location destination = move->GetDestination();
if (source.IsRegister() && destination.IsRegister()) {
- __ xchgq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
+ Exchange64(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
} else if (source.IsRegister() && destination.IsStackSlot()) {
Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
} else if (source.IsStackSlot() && destination.IsRegister()) {
@@ -3880,5 +4234,66 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction) {
LOG(FATAL) << "Unreachable";
}
+void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
+ // Generate the constant area if needed.
+ X86_64Assembler* assembler = GetAssembler();
+ if (!assembler->IsConstantAreaEmpty()) {
+ // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
+ // byte values. If used for vectors at a later time, this will need to be
+ // updated to 16 bytes with the appropriate offset.
+ assembler->Align(4, 0);
+ constant_area_start_ = assembler->CodeSize();
+ assembler->AddConstantArea();
+ }
+
+ // And finish up.
+ CodeGenerator::Finalize(allocator);
+}
+
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocMisc> {
+ public:
+ RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
+ : codegen_(codegen), offset_into_constant_area_(offset) {}
+
+ private:
+ void Process(const MemoryRegion& region, int pos) OVERRIDE {
+ // Patch the correct offset for the instruction. We use the address of the
+ // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+ int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
+ int relative_position = constant_offset - pos;
+
+ // Patch in the right value.
+ region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+ }
+
+ const CodeGeneratorX86_64& codegen_;
+
+ // Location in constant area that the fixup refers to.
+ int offset_into_constant_area_;
+};
+
+Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
+ return Address::RIP(fixup);
+}
+
+Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
+ AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
+ return Address::RIP(fixup);
+}
+
} // namespace x86_64
} // namespace art
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 375c0b03b9..61bf6ac71d 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -118,6 +118,7 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolver {
void Exchange32(CpuRegister reg, int mem);
void Exchange32(XmmRegister reg, int mem);
void Exchange32(int mem1, int mem2);
+ void Exchange64(CpuRegister reg1, CpuRegister reg2);
void Exchange64(CpuRegister reg, int mem);
void Exchange64(XmmRegister reg, int mem);
void Exchange64(int mem1, int mem2);
@@ -173,6 +174,9 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
void GenerateClassInitializationCheck(SlowPathCodeX86_64* slow_path, CpuRegister class_reg);
void HandleBitwiseOperation(HBinaryOperation* operation);
void GenerateRemFP(HRem *rem);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivByPowerOfTwo(HDiv* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleShift(HBinaryOperation* operation);
void GenerateMemoryBarrier(MemBarrierKind kind);
@@ -243,6 +247,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
Location AllocateFreeRegister(Primitive::Type type) const OVERRIDE;
void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void Finalize(CodeAllocator* allocator) OVERRIDE;
InstructionSet GetInstructionSet() const OVERRIDE {
return InstructionSet::kX86_64;
@@ -274,6 +279,15 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return isa_features_;
}
+ int ConstantAreaStart() const {
+ return constant_area_start_;
+ }
+
+ Address LiteralDoubleAddress(double v);
+ Address LiteralFloatAddress(float v);
+ Address LiteralInt32Address(int32_t v);
+ Address LiteralInt64Address(int64_t v);
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
@@ -284,6 +298,10 @@ class CodeGeneratorX86_64 : public CodeGenerator {
X86_64Assembler assembler_;
const X86_64InstructionSetFeatures& isa_features_;
+ // Offset to the start of the constant area in the assembled code.
+ // Used for fixups to the constant area.
+ int constant_area_start_;
+
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
};
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 56ec8a7ed1..afbc490150 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -24,9 +24,21 @@ namespace art {
class InstructionSimplifierVisitor : public HGraphVisitor {
public:
InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
- : HGraphVisitor(graph), stats_(stats) {}
+ : HGraphVisitor(graph),
+ stats_(stats) {}
+
+ void Run();
private:
+ void RecordSimplification() {
+ simplification_occurred_ = true;
+ simplifications_at_current_position_++;
+ if (stats_) {
+ stats_->RecordStat(kInstructionSimplifications);
+ }
+ }
+
+ bool TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop);
void VisitShift(HBinaryOperation* shift);
void VisitSuspendCheck(HSuspendCheck* check) OVERRIDE;
@@ -40,6 +52,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
void VisitAnd(HAnd* instruction) OVERRIDE;
void VisitDiv(HDiv* instruction) OVERRIDE;
void VisitMul(HMul* instruction) OVERRIDE;
+ void VisitNeg(HNeg* instruction) OVERRIDE;
+ void VisitNot(HNot* instruction) OVERRIDE;
void VisitOr(HOr* instruction) OVERRIDE;
void VisitShl(HShl* instruction) OVERRIDE;
void VisitShr(HShr* instruction) OVERRIDE;
@@ -48,11 +62,38 @@ class InstructionSimplifierVisitor : public HGraphVisitor {
void VisitXor(HXor* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
+ bool simplification_occurred_ = false;
+ int simplifications_at_current_position_ = 0;
+ // We ensure we do not loop infinitely. The value is a finger in the air guess
+ // that should allow enough simplification.
+ static constexpr int kMaxSamePositionSimplifications = 10;
};
void InstructionSimplifier::Run() {
InstructionSimplifierVisitor visitor(graph_, stats_);
- visitor.VisitInsertionOrder();
+ visitor.Run();
+}
+
+void InstructionSimplifierVisitor::Run() {
+ for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) {
+ // The simplification of an instruction to another instruction may yield
+ // possibilities for other simplifications. So although we perform a reverse
+ // post order visit, we sometimes need to revisit an instruction index.
+ simplification_occurred_ = false;
+ VisitBasicBlock(it.Current());
+ if (simplification_occurred_ &&
+ (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) {
+ // New simplifications may be applicable to the instruction at the
+ // current index, so don't advance the iterator.
+ continue;
+ }
+ if (simplifications_at_current_position_ >= kMaxSamePositionSimplifications) {
+ LOG(WARNING) << "Too many simplifications (" << simplifications_at_current_position_
+ << ") occurred at the current position.";
+ }
+ simplifications_at_current_position_ = 0;
+ it.Advance();
+ }
}
namespace {
@@ -63,6 +104,35 @@ bool AreAllBitsSet(HConstant* constant) {
} // namespace
+// Returns true if the code was simplified to use only one negation operation
+// after the binary operation instead of one on each of the inputs.
+bool InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop(HBinaryOperation* binop) {
+ DCHECK(binop->IsAdd() || binop->IsSub());
+ DCHECK(binop->GetLeft()->IsNeg() && binop->GetRight()->IsNeg());
+ HNeg* left_neg = binop->GetLeft()->AsNeg();
+ HNeg* right_neg = binop->GetRight()->AsNeg();
+ if (!left_neg->HasOnlyOneNonEnvironmentUse() ||
+ !right_neg->HasOnlyOneNonEnvironmentUse()) {
+ return false;
+ }
+ // Replace code looking like
+ // NEG tmp1, a
+ // NEG tmp2, b
+ // ADD dst, tmp1, tmp2
+ // with
+ // ADD tmp, a, b
+ // NEG dst, tmp
+ binop->ReplaceInput(left_neg->GetInput(), 0);
+ binop->ReplaceInput(right_neg->GetInput(), 1);
+ left_neg->GetBlock()->RemoveInstruction(left_neg);
+ right_neg->GetBlock()->RemoveInstruction(right_neg);
+ HNeg* neg = new (GetGraph()->GetArena()) HNeg(binop->GetType(), binop);
+ binop->GetBlock()->InsertInstructionBefore(neg, binop->GetNext());
+ binop->ReplaceWithExceptInReplacementAtIndex(neg, 0);
+ RecordSimplification();
+ return true;
+}
+
void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
HConstant* input_cst = instruction->GetConstantRight();
@@ -182,6 +252,36 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
// src
instruction->ReplaceWith(input_other);
instruction->GetBlock()->RemoveInstruction(instruction);
+ return;
+ }
+
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ bool left_is_neg = left->IsNeg();
+ bool right_is_neg = right->IsNeg();
+
+ if (left_is_neg && right_is_neg) {
+ if (TryMoveNegOnInputsAfterBinop(instruction)) {
+ return;
+ }
+ }
+
+ HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
+ if ((left_is_neg ^ right_is_neg) && neg->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, b
+ // ADD dst, a, tmp
+ // with
+ // SUB dst, a, b
+ // We do not perform the optimization if the input negation has environment
+ // uses or multiple non-environment uses as it could lead to worse code. In
+ // particular, we do not want the live range of `b` to be extended if we are
+ // not sure the initial 'NEG' instruction can be removed.
+ HInstruction* other = left_is_neg ? right : left;
+ HSub* sub = new(GetGraph()->GetArena()) HSub(instruction->GetType(), other, neg->GetInput());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub);
+ RecordSimplification();
+ neg->GetBlock()->RemoveInstruction(neg);
}
}
@@ -201,7 +301,7 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) {
// We assume that GVN has run before, so we only perform a pointer comparison.
// If for some reason the values are equal but the pointers are different, we
- // are still correct and only miss an optimisation opportunity.
+ // are still correct and only miss an optimization opportunity.
if (instruction->GetLeft() == instruction->GetRight()) {
// Replace code looking like
// AND dst, src, src
@@ -235,6 +335,7 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) {
// NEG dst, src
instruction->GetBlock()->ReplaceAndRemoveInstructionWith(
instruction, (new (GetGraph()->GetArena()) HNeg(type, input_other)));
+ RecordSimplification();
}
}
@@ -267,6 +368,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) {
// NEG dst, src
HNeg* neg = new (allocator) HNeg(type, input_other);
block->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
return;
}
@@ -280,6 +382,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) {
// The 'int' and 'long' cases are handled below.
block->ReplaceAndRemoveInstructionWith(instruction,
new (allocator) HAdd(type, input_other, input_other));
+ RecordSimplification();
return;
}
@@ -295,7 +398,72 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) {
HIntConstant* shift = GetGraph()->GetIntConstant(WhichPowerOf2(factor));
HShl* shl = new(allocator) HShl(type, input_other, shift);
block->ReplaceAndRemoveInstructionWith(instruction, shl);
+ RecordSimplification();
+ }
+ }
+}
+
+void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) {
+ HInstruction* input = instruction->GetInput();
+ if (input->IsNeg()) {
+ // Replace code looking like
+ // NEG tmp, src
+ // NEG dst, tmp
+ // with
+ // src
+ HNeg* previous_neg = input->AsNeg();
+ instruction->ReplaceWith(previous_neg->GetInput());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ // We perform the optimization even if the input negation has environment
+ // uses since it allows removing the current instruction. But we only delete
+ // the input negation only if it is does not have any uses left.
+ if (!previous_neg->HasUses()) {
+ previous_neg->GetBlock()->RemoveInstruction(previous_neg);
+ }
+ RecordSimplification();
+ return;
+ }
+
+ if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // SUB tmp, a, b
+ // NEG dst, tmp
+ // with
+ // SUB dst, b, a
+ // We do not perform the optimization if the input subtraction has
+ // environment uses or multiple non-environment uses as it could lead to
+ // worse code. In particular, we do not want the live ranges of `a` and `b`
+ // to be extended if we are not sure the initial 'SUB' instruction can be
+ // removed.
+ HSub* sub = input->AsSub();
+ HSub* new_sub =
+ new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, new_sub);
+ if (!sub->HasUses()) {
+ sub->GetBlock()->RemoveInstruction(sub);
+ }
+ RecordSimplification();
+ }
+}
+
+void InstructionSimplifierVisitor::VisitNot(HNot* instruction) {
+ HInstruction* input = instruction->GetInput();
+ if (input->IsNot()) {
+ // Replace code looking like
+ // NOT tmp, src
+ // NOT dst, tmp
+ // with
+ // src
+ // We perform the optimization even if the input negation has environment
+ // uses since it allows removing the current instruction. But we only delete
+ // the input negation only if it is does not have any uses left.
+ HNot* previous_not = input->AsNot();
+ instruction->ReplaceWith(previous_not->GetInput());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ if (!previous_not->HasUses()) {
+ previous_not->GetBlock()->RemoveInstruction(previous_not);
}
+ RecordSimplification();
}
}
@@ -315,7 +483,7 @@ void InstructionSimplifierVisitor::VisitOr(HOr* instruction) {
// We assume that GVN has run before, so we only perform a pointer comparison.
// If for some reason the values are equal but the pointers are different, we
- // are still correct and only miss an optimisation opportunity.
+ // are still correct and only miss an optimization opportunity.
if (instruction->GetLeft() == instruction->GetRight()) {
// Replace code looking like
// OR dst, src, src
@@ -356,20 +524,61 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) {
HBasicBlock* block = instruction->GetBlock();
ArenaAllocator* allocator = GetGraph()->GetArena();
- if (instruction->GetLeft()->IsConstant()) {
- int64_t left = Int64FromConstant(instruction->GetLeft()->AsConstant());
- if (left == 0) {
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ if (left->IsConstant()) {
+ if (Int64FromConstant(left->AsConstant()) == 0) {
// Replace code looking like
// SUB dst, 0, src
// with
// NEG dst, src
- // Note that we cannot optimise `0.0 - x` to `-x` for floating-point. When
+ // Note that we cannot optimize `0.0 - x` to `-x` for floating-point. When
// `x` is `0.0`, the former expression yields `0.0`, while the later
// yields `-0.0`.
- HNeg* neg = new (allocator) HNeg(type, instruction->GetRight());
+ HNeg* neg = new (allocator) HNeg(type, right);
block->ReplaceAndRemoveInstructionWith(instruction, neg);
+ RecordSimplification();
+ return;
+ }
+ }
+
+ if (left->IsNeg() && right->IsNeg()) {
+ if (TryMoveNegOnInputsAfterBinop(instruction)) {
+ return;
}
}
+
+ if (right->IsNeg() && right->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, b
+ // SUB dst, a, tmp
+ // with
+ // ADD dst, a, b
+ HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left, right->AsNeg()->GetInput());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
+ RecordSimplification();
+ right->GetBlock()->RemoveInstruction(right);
+ return;
+ }
+
+ if (left->IsNeg() && left->HasOnlyOneNonEnvironmentUse()) {
+ // Replace code looking like
+ // NEG tmp, a
+ // SUB dst, tmp, b
+ // with
+ // ADD tmp, a, b
+ // NEG dst, tmp
+ // The second version is not intrinsically better, but enables more
+ // transformations.
+ HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left->AsNeg()->GetInput(), right);
+ instruction->GetBlock()->InsertInstructionBefore(add, instruction);
+ HNeg* neg = new (GetGraph()->GetArena()) HNeg(instruction->GetType(), add);
+ instruction->GetBlock()->InsertInstructionBefore(neg, instruction);
+ instruction->ReplaceWith(neg);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ RecordSimplification();
+ left->GetBlock()->RemoveInstruction(left);
+ }
}
void InstructionSimplifierVisitor::VisitUShr(HUShr* instruction) {
@@ -397,6 +606,7 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) {
// NOT dst, src
HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other);
instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not);
+ RecordSimplification();
return;
}
}
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 5122a00d92..cbf94f0f81 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -298,25 +298,27 @@ static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke)
// TODO: Allow x86 to work with memory. This requires assembler support, see below.
// locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
- locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
+ locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
+static void MathAbsFP(LocationSummary* locations,
+ bool is64bit,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen) {
Location output = locations->Out();
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (output.IsFpuRegister()) {
// In-register
- XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
+ XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // TODO: Can mask directly with constant area using pand if we can guarantee
+ // that the literal is aligned on a 16 byte boundary. This will avoid a
+ // temporary.
if (is64bit) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
__ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
- __ movd(xmm_temp, cpu_temp);
+ __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
__ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
}
} else {
@@ -341,7 +343,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), true, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
@@ -349,7 +351,7 @@ void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), false, GetAssembler());
+ MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
}
static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
@@ -399,8 +401,11 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
}
-static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
- X86_64Assembler* assembler) {
+static void GenMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool is_double,
+ X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen) {
Location op1_loc = locations->InAt(0);
Location op2_loc = locations->InAt(1);
Location out_loc = locations->Out();
@@ -427,7 +432,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
//
// This removes one jmp, but needs to copy one input (op1) to out.
//
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
@@ -461,14 +466,11 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
// NaN handling.
__ Bind(&nan);
- CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
if (is_double) {
- __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
+ __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
} else {
- __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
+ __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
}
- __ movd(out, cpu_temp, is_double);
__ jmp(&done);
// out := op2;
@@ -483,7 +485,7 @@ static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
__ Bind(&done);
}
-static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
kIntrinsified);
@@ -492,39 +494,38 @@ static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invo
// The following is sub-optimal, but all we can do for now. It would be fine to also accept
// the second input to be the output (we can simply swap inputs).
locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
}
void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPPlusTempLocations(arena_, invoke);
+ CreateFPFPToFP(arena_, invoke);
}
void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
+ GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
}
static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f764eb421f..5f50494482 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1177,6 +1177,9 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
bool HasUses() const { return !uses_.IsEmpty() || !env_uses_.IsEmpty(); }
bool HasEnvironmentUses() const { return !env_uses_.IsEmpty(); }
bool HasNonEnvironmentUses() const { return !uses_.IsEmpty(); }
+ bool HasOnlyOneNonEnvironmentUse() const {
+ return !HasEnvironmentUses() && GetUses().HasOnlyOneUse();
+ }
// Does this instruction strictly dominate `other_instruction`?
// Returns false if this instruction and `other_instruction` are the same.
@@ -1214,6 +1217,13 @@ class HInstruction : public ArenaObject<kArenaAllocMisc> {
void ReplaceWith(HInstruction* instruction);
void ReplaceInput(HInstruction* replacement, size_t index);
+ // This is almost the same as doing `ReplaceWith()`. But in this helper, the
+ // uses of this instruction by `other` are *not* updated.
+ void ReplaceWithExceptInReplacementAtIndex(HInstruction* other, size_t use_index) {
+ ReplaceWith(other);
+ other->ReplaceInput(this, use_index);
+ }
+
// Move `this` instruction before `cursor`.
void MoveBefore(HInstruction* cursor);
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
new file mode 100644
index 0000000000..6d986ba7d3
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "arch/instruction_set.h"
+#include "cfi_test.h"
+#include "gtest/gtest.h"
+#include "optimizing/code_generator.h"
+#include "utils/assembler.h"
+
+#include "optimizing/optimizing_cfi_test_expected.inc"
+
+namespace art {
+
+// Run the tests only on host.
+#ifndef HAVE_ANDROID_OS
+
+class OptimizingCFITest : public CFITest {
+ public:
+ // Enable this flag to generate the expected outputs.
+ static constexpr bool kGenerateExpected = false;
+
+ void TestImpl(InstructionSet isa, const char* isa_str,
+ const std::vector<uint8_t>& expected_asm,
+ const std::vector<uint8_t>& expected_cfi) {
+ // Setup simple context.
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ CompilerOptions opts;
+ std::unique_ptr<const InstructionSetFeatures> isa_features;
+ std::string error;
+ isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+ HGraph graph(&allocator);
+ // Generate simple frame with some spills.
+ std::unique_ptr<CodeGenerator> code_gen(
+ CodeGenerator::Create(&graph, isa, *isa_features.get(), opts));
+ const int frame_size = 64;
+ int core_reg = 0;
+ int fp_reg = 0;
+ for (int i = 0; i < 2; i++) { // Two registers of each kind.
+ for (; core_reg < 32; core_reg++) {
+ if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+ auto location = Location::RegisterLocation(core_reg);
+ code_gen->AddAllocatedRegister(location);
+ core_reg++;
+ break;
+ }
+ }
+ for (; fp_reg < 32; fp_reg++) {
+ if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+ auto location = Location::FpuRegisterLocation(fp_reg);
+ code_gen->AddAllocatedRegister(location);
+ fp_reg++;
+ break;
+ }
+ }
+ }
+ code_gen->ComputeSpillMask();
+ code_gen->SetFrameSize(frame_size);
+ code_gen->GenerateFrameEntry();
+ code_gen->GetInstructionVisitor()->VisitReturnVoid(new (&allocator) HReturnVoid());
+ // Get the outputs.
+ InternalCodeAllocator code_allocator;
+ code_gen->Finalize(&code_allocator);
+ const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
+ Assembler* opt_asm = code_gen->GetAssembler();
+ const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+
+ if (kGenerateExpected) {
+ GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
+ } else {
+ EXPECT_EQ(expected_asm, actual_asm);
+ EXPECT_EQ(expected_cfi, actual_cfi);
+ }
+ }
+
+ private:
+ class InternalCodeAllocator : public CodeAllocator {
+ public:
+ InternalCodeAllocator() {}
+
+ virtual uint8_t* Allocate(size_t size) {
+ memory_.resize(size);
+ return memory_.data();
+ }
+
+ const std::vector<uint8_t>& GetMemory() { return memory_; }
+
+ private:
+ std::vector<uint8_t> memory_;
+
+ DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+ };
+};
+
+#define TEST_ISA(isa) \
+ TEST_F(OptimizingCFITest, isa) { \
+ std::vector<uint8_t> expected_asm(expected_asm_##isa, \
+ expected_asm_##isa + arraysize(expected_asm_##isa)); \
+ std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
+ expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+ TestImpl(isa, #isa, expected_asm, expected_cfi); \
+ }
+
+TEST_ISA(kThumb2)
+TEST_ISA(kArm64)
+TEST_ISA(kX86)
+TEST_ISA(kX86_64)
+
+#endif // HAVE_ANDROID_OS
+
+} // namespace art
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
new file mode 100644
index 0000000000..2125f6eb01
--- /dev/null
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -0,0 +1,141 @@
+static constexpr uint8_t expected_asm_kThumb2[] = {
+ 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+ 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2[] = {
+ 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+ 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+ 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E,
+ 0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: .cfi_remember_state
+// 0x0000000a: add sp, sp, #44
+// 0x0000000c: .cfi_def_cfa_offset: 20
+// 0x0000000c: vpop.f32 {s16-s17}
+// 0x00000010: .cfi_def_cfa_offset: 12
+// 0x00000010: .cfi_restore_extended: r80
+// 0x00000010: .cfi_restore_extended: r81
+// 0x00000010: pop {r5, r6, pc}
+// 0x00000012: .cfi_restore_state
+// 0x00000012: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kArm64[] = {
+ 0xE0, 0x0F, 0x1C, 0xB8, 0xF3, 0xD3, 0x02, 0xA9, 0xFE, 0x1F, 0x00, 0xF9,
+ 0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF3, 0xD3, 0x42, 0xA9,
+ 0xFE, 0x1F, 0x40, 0xF9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
+};
+static constexpr uint8_t expected_cfi_kArm64[] = {
+ 0x44, 0x0E, 0x40, 0x44, 0x93, 0x06, 0x94, 0x04, 0x44, 0x9E, 0x02, 0x44,
+ 0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
+ 0x44, 0xD3, 0xD4, 0x44, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: str w0, [sp, #-64]!
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: stp x19, x20, [sp, #40]
+// 0x00000008: .cfi_offset: r19 at cfa-24
+// 0x00000008: .cfi_offset: r20 at cfa-16
+// 0x00000008: str lr, [sp, #56]
+// 0x0000000c: .cfi_offset: r30 at cfa-8
+// 0x0000000c: stp d8, d9, [sp, #24]
+// 0x00000010: .cfi_offset_extended: r72 at cfa-40
+// 0x00000010: .cfi_offset_extended: r73 at cfa-32
+// 0x00000010: .cfi_remember_state
+// 0x00000010: ldp d8, d9, [sp, #24]
+// 0x00000014: .cfi_restore_extended: r72
+// 0x00000014: .cfi_restore_extended: r73
+// 0x00000014: ldp x19, x20, [sp, #40]
+// 0x00000018: .cfi_restore: r19
+// 0x00000018: .cfi_restore: r20
+// 0x00000018: ldr lr, [sp, #56]
+// 0x0000001c: .cfi_restore: r30
+// 0x0000001c: add sp, sp, #0x40 (64)
+// 0x00000020: .cfi_def_cfa_offset: 0
+// 0x00000020: ret
+// 0x00000024: .cfi_restore_state
+// 0x00000024: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86[] = {
+ 0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D,
+ 0x5E, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86[] = {
+ 0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E,
+ 0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
+ 0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push esi
+// 0x00000001: .cfi_def_cfa_offset: 8
+// 0x00000001: .cfi_offset: r6 at cfa-8
+// 0x00000001: push ebp
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: sub esp, 52
+// 0x00000005: .cfi_def_cfa_offset: 64
+// 0x00000005: mov [esp], eax
+// 0x00000008: .cfi_remember_state
+// 0x00000008: add esp, 52
+// 0x0000000b: .cfi_def_cfa_offset: 12
+// 0x0000000b: pop ebp
+// 0x0000000c: .cfi_def_cfa_offset: 8
+// 0x0000000c: .cfi_restore: r5
+// 0x0000000c: pop esi
+// 0x0000000d: .cfi_def_cfa_offset: 4
+// 0x0000000d: .cfi_restore: r6
+// 0x0000000d: ret
+// 0x0000000e: .cfi_restore_state
+// 0x0000000e: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kX86_64[] = {
+ 0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
+ 0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x89, 0x3C, 0x24, 0xF2,
+ 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C, 0x24,
+ 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
+};
+static constexpr uint8_t expected_cfi_kX86_64[] = {
+ 0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
+ 0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x43, 0x0A, 0x47, 0xDD, 0x47,
+ 0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
+ 0x41, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: push rbp
+// 0x00000001: .cfi_def_cfa_offset: 16
+// 0x00000001: .cfi_offset: r6 at cfa-16
+// 0x00000001: push rbx
+// 0x00000002: .cfi_def_cfa_offset: 24
+// 0x00000002: .cfi_offset: r3 at cfa-24
+// 0x00000002: subq rsp, 40
+// 0x00000006: .cfi_def_cfa_offset: 64
+// 0x00000006: movsd [rsp + 32], xmm13
+// 0x0000000d: .cfi_offset: r30 at cfa-32
+// 0x0000000d: movsd [rsp + 24], xmm12
+// 0x00000014: .cfi_offset: r29 at cfa-40
+// 0x00000014: mov [rsp], edi
+// 0x00000017: .cfi_remember_state
+// 0x00000017: movsd xmm12, [rsp + 24]
+// 0x0000001e: .cfi_restore: r29
+// 0x0000001e: movsd xmm13, [rsp + 32]
+// 0x00000025: .cfi_restore: r30
+// 0x00000025: addq rsp, 40
+// 0x00000029: .cfi_def_cfa_offset: 24
+// 0x00000029: pop rbx
+// 0x0000002a: .cfi_def_cfa_offset: 16
+// 0x0000002a: .cfi_restore: r3
+// 0x0000002a: pop rbp
+// 0x0000002b: .cfi_def_cfa_offset: 8
+// 0x0000002b: .cfi_restore: r6
+// 0x0000002b: ret
+// 0x0000002c: .cfi_restore_state
+// 0x0000002c: .cfi_def_cfa_offset: 64
+
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 12798edac5..a428c75c8c 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -50,6 +50,7 @@
#include "ssa_builder.h"
#include "ssa_phi_elimination.h"
#include "ssa_liveness_analysis.h"
+#include "utils/assembler.h"
#include "reference_type_propagation.h"
namespace art {
@@ -199,20 +200,6 @@ class OptimizingCompiler FINAL : public Compiler {
InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
}
- bool WriteElf(art::File* file,
- OatWriter* oat_writer,
- const std::vector<const art::DexFile*>& dex_files,
- const std::string& android_root,
- bool is_host) const OVERRIDE SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
- if (kProduce64BitELFFiles && Is64BitInstructionSet(GetCompilerDriver()->GetInstructionSet())) {
- return art::ElfWriterQuick64::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
- } else {
- return art::ElfWriterQuick32::Create(file, oat_writer, dex_files, android_root, is_host,
- *GetCompilerDriver());
- }
- }
-
void InitCompilationUnit(CompilationUnit& cu) const OVERRIDE;
void Init() OVERRIDE;
@@ -370,6 +357,9 @@ static ArrayRef<const uint8_t> AlignVectorSize(std::vector<uint8_t>& vector) {
return ArrayRef<const uint8_t>(vector);
}
+// TODO: The function below uses too much stack space.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeGenerator* codegen,
@@ -395,12 +385,17 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
CodeVectorAllocator allocator;
codegen->CompileOptimized(&allocator);
+ DefaultSrcMap src_mapping_table;
+ if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+ codegen->BuildSourceMap(&src_mapping_table);
+ }
+
std::vector<uint8_t> stack_map;
codegen->BuildStackMaps(&stack_map);
compilation_stats_.RecordStat(MethodCompilationStat::kCompiledOptimized);
- return CompiledMethod::SwapAllocCompiledMethodStackMap(
+ return CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
codegen->GetInstructionSet(),
ArrayRef<const uint8_t>(allocator.GetMemory()),
@@ -410,9 +405,15 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
- ArrayRef<const uint8_t>(stack_map));
+ &src_mapping_table,
+ ArrayRef<const uint8_t>(), // mapping_table.
+ ArrayRef<const uint8_t>(stack_map),
+ ArrayRef<const uint8_t>(), // native_gc_map.
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
}
+#pragma GCC diagnostic pop
CompiledMethod* OptimizingCompiler::CompileBaseline(
CodeGenerator* codegen,
@@ -422,9 +423,11 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
codegen->CompileBaseline(&allocator);
std::vector<uint8_t> mapping_table;
+ codegen->BuildMappingTable(&mapping_table);
DefaultSrcMap src_mapping_table;
- bool include_debug_symbol = compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols();
- codegen->BuildMappingTable(&mapping_table, include_debug_symbol ? &src_mapping_table : nullptr);
+ if (compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols()) {
+ codegen->BuildSourceMap(&src_mapping_table);
+ }
std::vector<uint8_t> vmap_table;
codegen->BuildVMapTable(&vmap_table);
std::vector<uint8_t> gc_map;
@@ -445,7 +448,8 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
AlignVectorSize(mapping_table),
AlignVectorSize(vmap_table),
AlignVectorSize(gc_map),
- ArrayRef<const uint8_t>());
+ ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
+ ArrayRef<const LinkerPatch>());
}
CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
@@ -511,6 +515,8 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
compilation_stats_.RecordStat(MethodCompilationStat::kNotCompiledNoCodegen);
return nullptr;
}
+ codegen->GetAssembler()->cfi().SetEnabled(
+ compiler_driver->GetCompilerOptions().GetIncludeDebugSymbols());
PassInfoPrinter pass_info_printer(graph,
method_name.c_str(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index b97a66719d..4d5b8d0639 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -47,6 +47,7 @@ enum MethodCompilationStat {
kNotCompiledUnhandledInstruction,
kRemovedCheckedCast,
kRemovedNullCheck,
+ kInstructionSimplifications,
kLastStat
};
@@ -110,6 +111,7 @@ class OptimizingCompilerStats {
case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
case kRemovedCheckedCast: return "kRemovedCheckedCast";
case kRemovedNullCheck: return "kRemovedNullCheck";
+ case kInstructionSimplifications: return "kInstructionSimplifications";
default: LOG(FATAL) << "invalid stat";
}
return "";
diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc
index 9df8f5640d..4936685367 100644
--- a/compiler/optimizing/parallel_move_resolver.cc
+++ b/compiler/optimizing/parallel_move_resolver.cc
@@ -269,6 +269,20 @@ int ParallelMoveResolver::AllocateScratchRegister(int blocked,
}
+int ParallelMoveResolver::AllocateScratchRegister(int blocked,
+ int register_count) {
+ int scratch = -1;
+ for (int reg = 0; reg < register_count; ++reg) {
+ if ((blocked != reg) && IsScratchLocation(Location::RegisterLocation(reg))) {
+ scratch = reg;
+ break;
+ }
+ }
+
+ return scratch;
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
ParallelMoveResolver* resolver, int blocked, int if_scratch, int number_of_registers)
: resolver_(resolver),
@@ -282,6 +296,16 @@ ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
}
+ParallelMoveResolver::ScratchRegisterScope::ScratchRegisterScope(
+ ParallelMoveResolver* resolver, int blocked, int number_of_registers)
+ : resolver_(resolver),
+ reg_(kNoRegister),
+ spilled_(false) {
+ // We don't want to spill a register if none are free.
+ reg_ = resolver_->AllocateScratchRegister(blocked, number_of_registers);
+}
+
+
ParallelMoveResolver::ScratchRegisterScope::~ScratchRegisterScope() {
if (spilled_) {
resolver_->RestoreScratch(reg_);
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index 3fa1b37afd..173cffc71e 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -42,10 +42,15 @@ class ParallelMoveResolver : public ValueObject {
protected:
class ScratchRegisterScope : public ValueObject {
public:
+ // Spill a scratch register if no regs are free.
ScratchRegisterScope(ParallelMoveResolver* resolver,
int blocked,
int if_scratch,
int number_of_registers);
+ // Grab a scratch register only if available.
+ ScratchRegisterScope(ParallelMoveResolver* resolver,
+ int blocked,
+ int number_of_registers);
~ScratchRegisterScope();
int GetRegister() const { return reg_; }
@@ -62,6 +67,8 @@ class ParallelMoveResolver : public ValueObject {
// Allocate a scratch register for performing a move. The method will try to use
// a register that is the destination of a move, but that move has not been emitted yet.
int AllocateScratchRegister(int blocked, int if_scratch, int register_count, bool* spilled);
+ // As above, but return -1 if no free register.
+ int AllocateScratchRegister(int blocked, int register_count);
// Emit a move.
virtual void EmitMove(size_t index) = 0;