Implement register allocator for floating point registers.

Also:
- Fix misuses of emitting the rex prefix in the x86_64 assembler.
- Fix movaps code generation in the x86_64 assembler.

Change-Id: Ib6dcf6e7c4a9c43368cfc46b02ba50f69ae69cbe
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 5ac0189..e04a8d8 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -80,8 +80,10 @@
 
  private:
   void Exchange32(CpuRegister reg, int mem);
+  void Exchange32(XmmRegister reg, int mem);
   void Exchange32(int mem1, int mem2);
   void Exchange64(CpuRegister reg, int mem);
+  void Exchange64(XmmRegister reg, int mem);
   void Exchange64(int mem1, int mem2);
 
   CodeGeneratorX86_64* const codegen_;
@@ -146,8 +148,10 @@
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(HBasicBlock* block) OVERRIDE;
   virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
-  virtual void SaveCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
-  virtual void RestoreCoreRegister(Location stack_location, uint32_t reg_id) OVERRIDE;
+  virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+  virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   virtual size_t GetWordSize() const OVERRIDE {
     return kX86_64WordSize;