Delay emitting CFI PC adjustments until after Thumb2/Mips fixup.

On Mips also take into account out-of-order CFI data emitted
from EmitBranches().

Change-Id: I03b0b0b4c2b1ea31a02699ef5fa1c55aa42c23c3
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index fb3aa1e..297cc54 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -282,6 +282,32 @@
   }
 }
 
+void Thumb2Assembler::PatchCFI() {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // Refill our data buffer with patched opcodes.
+  cfi().ReserveCFIStream(old_stream.size() + advances.size() + 16);
+  size_t stream_pos = 0;
+  for (const DelayedAdvancePC& advance : advances) {
+    DCHECK_GE(advance.stream_pos, stream_pos);
+    // Copy old data up to the point where advance was issued.
+    cfi().AppendRawData(old_stream, stream_pos, advance.stream_pos);
+    stream_pos = advance.stream_pos;
+    // Insert the advance command with its final offset.
+    size_t final_pc = GetAdjustedPosition(advance.pc);
+    cfi().AdvancePC(final_pc);
+  }
+  // Copy the final segment if any.
+  cfi().AppendRawData(old_stream, stream_pos, old_stream.size());
+}
+
 inline int16_t Thumb2Assembler::BEncoding16(int32_t offset, Condition cond) {
   DCHECK_ALIGNED(offset, 2);
   int16_t encoding = B15 | B14;
@@ -463,6 +489,7 @@
   EmitLiterals();
   FinalizeTrackedLabels();
   EmitJumpTables();
+  PatchCFI();
 }
 
 bool Thumb2Assembler::ShifterOperandCanAlwaysHold(uint32_t immediate) {
diff --git a/compiler/utils/arm/assembler_thumb2.h b/compiler/utils/arm/assembler_thumb2.h
index 38fd244..e183613 100644
--- a/compiler/utils/arm/assembler_thumb2.h
+++ b/compiler/utils/arm/assembler_thumb2.h
@@ -44,6 +44,7 @@
         last_position_adjustment_(0u),
         last_old_position_(0u),
         last_fixup_id_(0u) {
+    cfi().DelayEmittingAdvancePCs();
   }
 
   virtual ~Thumb2Assembler() {
@@ -792,6 +793,7 @@
   void EmitFixups(uint32_t adjusted_code_size);
   void EmitLiterals();
   void EmitJumpTables();
+  void PatchCFI();
 
   static int16_t BEncoding16(int32_t offset, Condition cond);
   static int32_t BEncoding32(int32_t offset, Condition cond);
diff --git a/compiler/utils/assembler.cc b/compiler/utils/assembler.cc
index b01b0fe..f784d2c 100644
--- a/compiler/utils/assembler.cc
+++ b/compiler/utils/assembler.cc
@@ -38,6 +38,7 @@
 #ifdef ART_ENABLE_CODEGEN_x86_64
 #include "x86_64/assembler_x86_64.h"
 #endif
+#include "base/casts.h"
 #include "globals.h"
 #include "memory_region.h"
 
@@ -119,7 +120,13 @@
 }
 
 void DebugFrameOpCodeWriterForAssembler::ImplicitlyAdvancePC() {
-  this->AdvancePC(assembler_->CodeSize());
+  uint32_t pc = dchecked_integral_cast<uint32_t>(assembler_->CodeSize());
+  if (delay_emitting_advance_pc_) {
+    uint32_t stream_pos = dchecked_integral_cast<uint32_t>(opcodes_.size());
+    delayed_advance_pcs_.push_back(DelayedAdvancePC {stream_pos, pc});
+  } else {
+    AdvancePC(pc);
+  }
 }
 
 Assembler* Assembler::Create(InstructionSet instruction_set,
diff --git a/compiler/utils/assembler.h b/compiler/utils/assembler.h
index dfe6bab..1dbc142 100644
--- a/compiler/utils/assembler.h
+++ b/compiler/utils/assembler.h
@@ -271,16 +271,71 @@
 class DebugFrameOpCodeWriterForAssembler FINAL
     : public dwarf::DebugFrameOpCodeWriter<> {
  public:
+  struct DelayedAdvancePC {
+    uint32_t stream_pos;
+    uint32_t pc;
+  };
+
   // This method is called the by the opcode writers.
   virtual void ImplicitlyAdvancePC() FINAL;
 
   explicit DebugFrameOpCodeWriterForAssembler(Assembler* buffer)
-      : dwarf::DebugFrameOpCodeWriter<>(),
-        assembler_(buffer) {
+      : dwarf::DebugFrameOpCodeWriter<>(false /* enabled */),
+        assembler_(buffer),
+        delay_emitting_advance_pc_(false),
+        delayed_advance_pcs_() {
+  }
+
+  ~DebugFrameOpCodeWriterForAssembler() {
+    DCHECK(delayed_advance_pcs_.empty());
+  }
+
+  // Tell the writer to delay emitting advance PC info.
+  // The assembler must explicitly process all the delayed advances.
+  void DelayEmittingAdvancePCs() {
+    delay_emitting_advance_pc_ = true;
+  }
+
+  // Override the last delayed PC. The new PC can be out of order.
+  void OverrideDelayedPC(size_t pc) {
+    DCHECK(delay_emitting_advance_pc_);
+    DCHECK(!delayed_advance_pcs_.empty());
+    delayed_advance_pcs_.back().pc = pc;
+  }
+
+  // Return the number of delayed advance PC entries.
+  size_t NumberOfDelayedAdvancePCs() const {
+    return delayed_advance_pcs_.size();
+  }
+
+  // Release the CFI stream and advance PC infos so that the assembler can patch it.
+  std::pair<std::vector<uint8_t>, std::vector<DelayedAdvancePC>>
+  ReleaseStreamAndPrepareForDelayedAdvancePC() {
+    DCHECK(delay_emitting_advance_pc_);
+    delay_emitting_advance_pc_ = false;
+    std::pair<std::vector<uint8_t>, std::vector<DelayedAdvancePC>> result;
+    result.first.swap(opcodes_);
+    result.second.swap(delayed_advance_pcs_);
+    return result;
+  }
+
+  // Reserve space for the CFI stream.
+  void ReserveCFIStream(size_t capacity) {
+    opcodes_.reserve(capacity);
+  }
+
+  // Append raw data to the CFI stream.
+  void AppendRawData(const std::vector<uint8_t>& raw_data, size_t first, size_t last) {
+    DCHECK_LE(0u, first);
+    DCHECK_LE(first, last);
+    DCHECK_LE(last, raw_data.size());
+    opcodes_.insert(opcodes_.end(), raw_data.begin() + first, raw_data.begin() + last);
   }
 
  private:
   Assembler* assembler_;
+  bool delay_emitting_advance_pc_;
+  std::vector<DelayedAdvancePC> delayed_advance_pcs_;
 };
 
 class Assembler {
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 6f35e9e..aee6412 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -43,8 +43,60 @@
 }
 
 void MipsAssembler::FinalizeInstructions(const MemoryRegion& region) {
+  size_t number_of_delayed_adjust_pcs = cfi().NumberOfDelayedAdvancePCs();
   EmitBranches();
   Assembler::FinalizeInstructions(region);
+  PatchCFI(number_of_delayed_adjust_pcs);
+}
+
+void MipsAssembler::PatchCFI(size_t number_of_delayed_adjust_pcs) {
+  if (cfi().NumberOfDelayedAdvancePCs() == 0u) {
+    DCHECK_EQ(number_of_delayed_adjust_pcs, 0u);
+    return;
+  }
+
+  typedef DebugFrameOpCodeWriterForAssembler::DelayedAdvancePC DelayedAdvancePC;
+  const auto data = cfi().ReleaseStreamAndPrepareForDelayedAdvancePC();
+  const std::vector<uint8_t>& old_stream = data.first;
+  const std::vector<DelayedAdvancePC>& advances = data.second;
+
+  // PCs recorded before EmitBranches() need to be adjusted.
+  // PCs recorded during EmitBranches() are already adjusted.
+  // Both ranges are separately sorted but they may overlap.
+  if (kIsDebugBuild) {
+    auto cmp = [](const DelayedAdvancePC& lhs, const DelayedAdvancePC& rhs) {
+      return lhs.pc < rhs.pc;
+    };
+    CHECK(std::is_sorted(advances.begin(), advances.begin() + number_of_delayed_adjust_pcs, cmp));
+    CHECK(std::is_sorted(advances.begin() + number_of_delayed_adjust_pcs, advances.end(), cmp));
+  }
+
+  // Append initial CFI data if any.
+  size_t size = advances.size();
+  DCHECK_NE(size, 0u);
+  cfi().AppendRawData(old_stream, 0u, advances[0].stream_pos);
+  // Emit PC adjustments interleaved with the old CFI stream.
+  size_t adjust_pos = 0u;
+  size_t late_emit_pos = number_of_delayed_adjust_pcs;
+  while (adjust_pos != number_of_delayed_adjust_pcs || late_emit_pos != size) {
+    size_t adjusted_pc = (adjust_pos != number_of_delayed_adjust_pcs)
+        ? GetAdjustedPosition(advances[adjust_pos].pc)
+        : static_cast<size_t>(-1);
+    size_t late_emit_pc = (late_emit_pos != size)
+        ? advances[late_emit_pos].pc
+        : static_cast<size_t>(-1);
+    size_t advance_pc = std::min(adjusted_pc, late_emit_pc);
+    DCHECK_NE(advance_pc, static_cast<size_t>(-1));
+    size_t entry = (adjusted_pc <= late_emit_pc) ? adjust_pos : late_emit_pos;
+    if (adjusted_pc <= late_emit_pc) {
+      ++adjust_pos;
+    } else {
+      ++late_emit_pos;
+    }
+    cfi().AdvancePC(advance_pc);
+    size_t end_pos = (entry + 1u == size) ? old_stream.size() : advances[entry + 1u].stream_pos;
+    cfi().AppendRawData(old_stream, advances[entry].stream_pos, end_pos);
+  }
 }
 
 void MipsAssembler::EmitBranches() {
@@ -1770,6 +1822,7 @@
                                const std::vector<ManagedRegister>& callee_save_regs,
                                const ManagedRegisterEntrySpills& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
 
   // Increase frame to required size.
   IncreaseFrameSize(frame_size);
@@ -1811,6 +1864,7 @@
 void MipsAssembler::RemoveFrame(size_t frame_size,
                                 const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
+  DCHECK(!overwriting_);
   cfi_.RememberState();
 
   // Pop callee saves and return address.
@@ -1840,12 +1894,18 @@
   CHECK_ALIGNED(adjust, kFramePointerSize);
   Addiu32(SP, SP, -adjust);
   cfi_.AdjustCFAOffset(adjust);
+  if (overwriting_) {
+    cfi_.OverrideDelayedPC(overwrite_location_);
+  }
 }
 
 void MipsAssembler::DecreaseFrameSize(size_t adjust) {
   CHECK_ALIGNED(adjust, kFramePointerSize);
   Addiu32(SP, SP, adjust);
   cfi_.AdjustCFAOffset(-adjust);
+  if (overwriting_) {
+    cfi_.OverrideDelayedPC(overwrite_location_);
+  }
 }
 
 void MipsAssembler::Store(FrameOffset dest, ManagedRegister msrc, size_t size) {
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index aa187b8..4038c1f 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -94,7 +94,9 @@
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
-        isa_features_(instruction_set_features) {}
+        isa_features_(instruction_set_features) {
+    cfi().DelayEmittingAdvancePCs();
+  }
 
   virtual ~MipsAssembler() {
     for (auto& branch : branches_) {
@@ -599,6 +601,7 @@
   void PromoteBranches();
   void EmitBranch(Branch* branch);
   void EmitBranches();
+  void PatchCFI(size_t number_of_delayed_adjust_pcs);
 
   // Emits exception block.
   void EmitExceptionPoll(MipsExceptionSlowPath* exception);