Improve string splitting

String splitting is something that we often have to do but our support
code for doing so is not the best. Add support for using
std::string_view in many circumstances and add support for making an
iterator of splits without allocation.

Test: ./test.py --host
Change-Id: I1b56b7e10926a064b64011326b508dd4af707df9
diff --git a/libartbase/base/stl_util.h b/libartbase/base/stl_util.h
index 4d4a686..dfe994e 100644
--- a/libartbase/base/stl_util.h
+++ b/libartbase/base/stl_util.h
@@ -323,6 +323,60 @@
   return SafePrinter<Val>{v};
 }
 
+// Helper struct for iterating a split-string without allocation.
+struct SplitStringIter : public std::iterator<std::forward_iterator_tag, std::string_view> {
+ public:
+  // Direct iterator constructor. The iteration state is only the current index.
+  // We use that with the split char and the full string to get the current and
+  // next segment.
+  SplitStringIter(size_t index, char split, std::string_view sv)
+      : cur_index_(index), split_on_(split), sv_(sv) {}
+  SplitStringIter(const SplitStringIter&) = default;
+  SplitStringIter(SplitStringIter&&) = default;
+  SplitStringIter& operator=(SplitStringIter&&) = default;
+  SplitStringIter& operator=(const SplitStringIter&) = default;
+
+  SplitStringIter& operator++() {
+    size_t nxt = sv_.find(split_on_, cur_index_);
+    if (nxt == std::string_view::npos) {
+      cur_index_ = std::string_view::npos;
+    } else {
+      cur_index_ = nxt + 1;
+    }
+    return *this;
+  }
+
+  SplitStringIter operator++(int) {
+    SplitStringIter ret(cur_index_, split_on_, sv_);
+    ++(*this);
+    return ret;
+  }
+
+  bool operator==(const SplitStringIter& other) const {
+    return sv_ == other.sv_ && split_on_ == other.split_on_ && cur_index_== other.cur_index_;
+  }
+
+  bool operator!=(const SplitStringIter& other) const {
+    return !(*this == other);
+  }
+
+  typename std::string_view operator*() const {
+    return sv_.substr(cur_index_, sv_.substr(cur_index_).find(split_on_));
+  }
+
+ private:
+  size_t cur_index_;
+  char split_on_;
+  std::string_view sv_;
+};
+
+// Create an iteration range over the string 'sv' split at each 'target' occurrence.
+// Eg: SplitString(":foo::bar") -> ["", "foo", "", "bar"]
+inline IterationRange<SplitStringIter> SplitString(std::string_view sv, char target) {
+  return MakeIterationRange(SplitStringIter(0, target, sv),
+                            SplitStringIter(std::string_view::npos, target, sv));
+}
+
 }  // namespace art
 
 #endif  // ART_LIBARTBASE_BASE_STL_UTIL_H_
diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc
index 492e737..ba62f30 100644
--- a/libartbase/base/utils.cc
+++ b/libartbase/base/utils.cc
@@ -25,11 +25,13 @@
 
 #include <fstream>
 #include <memory>
+#include <string>
 
 #include "android-base/file.h"
 #include "android-base/stringprintf.h"
 #include "android-base/strings.h"
 
+#include "base/stl_util.h"
 #include "bit_utils.h"
 #include "os.h"
 
@@ -230,22 +232,45 @@
                       byte_count / kBytesPerUnit[i], kUnitStrings[i]);
 }
 
-void Split(const std::string& s, char separator, std::vector<std::string>* result) {
-  const char* p = s.data();
-  const char* end = p + s.size();
-  while (p != end) {
-    if (*p == separator) {
-      ++p;
-    } else {
-      const char* start = p;
-      while (++p != end && *p != separator) {
-        // Skip to the next occurrence of the separator.
-      }
-      result->push_back(std::string(start, p - start));
+template <typename StrIn, typename Str>
+void Split(const StrIn& s, char separator, std::vector<Str>* out_result) {
+  auto split = SplitString(std::string_view(s), separator);
+  for (std::string_view p : split) {
+    if (p.empty()) {
+      continue;
     }
+    out_result->push_back(Str(p));
   }
 }
 
+template void Split(const char *const& s, char separator, std::vector<std::string>* out_result);
+template void Split(const std::string& s, char separator, std::vector<std::string>* out_result);
+template void Split(const char *const& s, char separator, std::vector<std::string_view>* out_result);
+template void Split(const std::string_view& s,
+                    char separator,
+                    std::vector<std::string_view>* out_result);
+
+template <typename Str>
+void Split(const Str& s, char separator, size_t len, Str* out_result) {
+  Str* last = out_result + len;
+  auto split = SplitString(std::string_view(s), separator);
+  for (std::string_view p : split) {
+    if (p.empty()) {
+      continue;
+    }
+    if (out_result == last) {
+      return;
+    }
+    *out_result++ = Str(p);
+  }
+}
+
+template void Split(const std::string& s, char separator, size_t len, std::string* out_result);
+template void Split(const std::string_view& s,
+                    char separator,
+                    size_t len,
+                    std::string_view* out_result);
+
 void SetThreadName(const char* thread_name) {
   bool hasAt = false;
   bool hasDot = false;
diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h
index 66a5699..7160302 100644
--- a/libartbase/base/utils.h
+++ b/libartbase/base/utils.h
@@ -44,7 +44,16 @@
 
 // Splits a string using the given separator character into a vector of
 // strings. Empty strings will be omitted.
-void Split(const std::string& s, char separator, std::vector<std::string>* result);
+template<typename StrIn, typename Str>
+void Split(const StrIn& s, char separator, std::vector<Str>* out_result);
+
+template<typename Str>
+void Split(const Str& s, char separator, size_t len, Str* out_result);
+
+template<typename StrIn, typename Str, size_t kLen>
+void Split(const StrIn& s, char separator, std::array<Str, kLen>* out_result) {
+  Split<Str>(Str(s), separator, kLen, &((*out_result)[0]));
+}
 
 // Returns the calling thread's tid. (The C libraries don't expose this.)
 uint32_t GetTid();
diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc
index f67ada2..09705fe 100644
--- a/libartbase/base/utils_test.cc
+++ b/libartbase/base/utils_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "utils.h"
+#include "stl_util.h"
 
 #include "gtest/gtest.h"
 
@@ -41,9 +42,13 @@
   EXPECT_EQ("512B", PrettySize(512));
 }
 
+void Split(const char* arr, char s, std::vector<std::string_view>* sv) {
+  Split<std::string_view>(std::string_view(arr), s, sv);
+}
+
 TEST_F(UtilsTest, Split) {
-  std::vector<std::string> actual;
-  std::vector<std::string> expected;
+  std::vector<std::string_view> actual;
+  std::vector<std::string_view> expected;
 
   expected.clear();
 
@@ -115,4 +120,48 @@
   EXPECT_EQ("<unknown>", GetProcessStatus("InvalidFieldName"));
 }
 
+TEST_F(UtilsTest, StringSplit) {
+  auto range = SplitString("[ab[c[[d[e[", '[');
+  auto it = range.begin();
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "ab");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "c");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "d");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "e");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "");
+  EXPECT_TRUE(it == range.end());
+}
+
+TEST_F(UtilsTest, StringSplit2) {
+  auto range = SplitString("ab[c[[d[e", '[');
+  auto it = range.begin();
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "ab");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "c");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "d");
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "e");
+  EXPECT_TRUE(it == range.end());
+}
+
+TEST_F(UtilsTest, StringSplit3) {
+  auto range = SplitString("", '[');
+  auto it = range.begin();
+  EXPECT_FALSE(it == range.end());
+  EXPECT_EQ(*it++, "");
+  EXPECT_TRUE(it == range.end());
+}
+
 }  // namespace art