Merge "Fix a few comments in vectorization code that were incorrect or incomplete."
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 80776e8..08a752f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -186,10 +186,10 @@
     LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);  // only saves full width XMM for SIMD
+    SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
     x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, locations);  // only saves full width XMM for SIMD
+    RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 49f099f..ff6e099 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -143,10 +143,10 @@
     LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);  // only saves full width XMM for SIMD
+    SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
-    RestoreLiveRegisters(codegen, locations);  // only saves full width XMM for SIMD
+    RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index ca31bf8..1a79601 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -544,12 +544,13 @@
     bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
     DCHECK(vectorized_def);
   }
-  // Generate body.
+  // Generate body from the instruction map, but in original program order.
   HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
   for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
     auto i = vector_map_->find(it.Current());
     if (i != vector_map_->end() && !i->second->IsInBlock()) {
-      Insert(vector_body_, i->second);  // lays out in original order
+      Insert(vector_body_, i->second);
+      // Deal with instructions that need an environment, such as the scalar intrinsics.
       if (i->second->NeedsEnvironment()) {
         i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
       }
@@ -991,8 +992,9 @@
             UNREACHABLE();
         }  // switch invoke
       } else {
-        // In scalar code, simply clone the method invoke, and replace its operands
-        // with the corresponding new scalar instructions in the loop.
+        // In scalar code, simply clone the method invoke, and replace its operands with the
+        // corresponding new scalar instructions in the loop. The instruction will get an
+        // environment while being inserted from the instruction map in original program order.
         DCHECK(vector_mode_ == kSequential);
         HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
             global_allocator_,