Improve string splitting
String splitting is something that we often have to do but our support
code for doing so is not the best. Add support for using
std::string_view in many circumstances and add support for making an
iterator of splits without allocation.
Test: ./test.py --host
Change-Id: I1b56b7e10926a064b64011326b508dd4af707df9
diff --git a/libartbase/base/stl_util.h b/libartbase/base/stl_util.h
index 4d4a686..dfe994e 100644
--- a/libartbase/base/stl_util.h
+++ b/libartbase/base/stl_util.h
@@ -323,6 +323,60 @@
return SafePrinter<Val>{v};
}
+// Helper struct for iterating a split-string without allocation.
+struct SplitStringIter : public std::iterator<std::forward_iterator_tag, std::string_view> {
+ public:
+ // Direct iterator constructor. The iteration state is only the current index.
+ // We use that with the split char and the full string to get the current and
+ // next segment.
+ SplitStringIter(size_t index, char split, std::string_view sv)
+ : cur_index_(index), split_on_(split), sv_(sv) {}
+ SplitStringIter(const SplitStringIter&) = default;
+ SplitStringIter(SplitStringIter&&) = default;
+ SplitStringIter& operator=(SplitStringIter&&) = default;
+ SplitStringIter& operator=(const SplitStringIter&) = default;
+
+ SplitStringIter& operator++() {
+ size_t nxt = sv_.find(split_on_, cur_index_);
+ if (nxt == std::string_view::npos) {
+ cur_index_ = std::string_view::npos;
+ } else {
+ cur_index_ = nxt + 1;
+ }
+ return *this;
+ }
+
+ SplitStringIter operator++(int) {
+ SplitStringIter ret(cur_index_, split_on_, sv_);
+ ++(*this);
+ return ret;
+ }
+
+ bool operator==(const SplitStringIter& other) const {
+ return sv_ == other.sv_ && split_on_ == other.split_on_ && cur_index_== other.cur_index_;
+ }
+
+ bool operator!=(const SplitStringIter& other) const {
+ return !(*this == other);
+ }
+
+ typename std::string_view operator*() const {
+ return sv_.substr(cur_index_, sv_.substr(cur_index_).find(split_on_));
+ }
+
+ private:
+ size_t cur_index_;
+ char split_on_;
+ std::string_view sv_;
+};
+
+// Create an iteration range over the string 'sv' split at each 'target' occurrence.
+// Eg: SplitString(":foo::bar") -> ["", "foo", "", "bar"]
+inline IterationRange<SplitStringIter> SplitString(std::string_view sv, char target) {
+ return MakeIterationRange(SplitStringIter(0, target, sv),
+ SplitStringIter(std::string_view::npos, target, sv));
+}
+
} // namespace art
#endif // ART_LIBARTBASE_BASE_STL_UTIL_H_
diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc
index 492e737..ba62f30 100644
--- a/libartbase/base/utils.cc
+++ b/libartbase/base/utils.cc
@@ -25,11 +25,13 @@
#include <fstream>
#include <memory>
+#include <string>
#include "android-base/file.h"
#include "android-base/stringprintf.h"
#include "android-base/strings.h"
+#include "base/stl_util.h"
#include "bit_utils.h"
#include "os.h"
@@ -230,22 +232,45 @@
byte_count / kBytesPerUnit[i], kUnitStrings[i]);
}
-void Split(const std::string& s, char separator, std::vector<std::string>* result) {
- const char* p = s.data();
- const char* end = p + s.size();
- while (p != end) {
- if (*p == separator) {
- ++p;
- } else {
- const char* start = p;
- while (++p != end && *p != separator) {
- // Skip to the next occurrence of the separator.
- }
- result->push_back(std::string(start, p - start));
+template <typename StrIn, typename Str>
+void Split(const StrIn& s, char separator, std::vector<Str>* out_result) {
+ auto split = SplitString(std::string_view(s), separator);
+ for (std::string_view p : split) {
+ if (p.empty()) {
+ continue;
}
+ out_result->push_back(Str(p));
}
}
+template void Split(const char *const& s, char separator, std::vector<std::string>* out_result);
+template void Split(const std::string& s, char separator, std::vector<std::string>* out_result);
+template void Split(const char *const& s, char separator, std::vector<std::string_view>* out_result);
+template void Split(const std::string_view& s,
+ char separator,
+ std::vector<std::string_view>* out_result);
+
+template <typename Str>
+void Split(const Str& s, char separator, size_t len, Str* out_result) {
+ Str* last = out_result + len;
+ auto split = SplitString(std::string_view(s), separator);
+ for (std::string_view p : split) {
+ if (p.empty()) {
+ continue;
+ }
+ if (out_result == last) {
+ return;
+ }
+ *out_result++ = Str(p);
+ }
+}
+
+template void Split(const std::string& s, char separator, size_t len, std::string* out_result);
+template void Split(const std::string_view& s,
+ char separator,
+ size_t len,
+ std::string_view* out_result);
+
void SetThreadName(const char* thread_name) {
bool hasAt = false;
bool hasDot = false;
diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h
index 66a5699..7160302 100644
--- a/libartbase/base/utils.h
+++ b/libartbase/base/utils.h
@@ -44,7 +44,16 @@
// Splits a string using the given separator character into a vector of
// strings. Empty strings will be omitted.
-void Split(const std::string& s, char separator, std::vector<std::string>* result);
+template<typename StrIn, typename Str>
+void Split(const StrIn& s, char separator, std::vector<Str>* out_result);
+
+template<typename Str>
+void Split(const Str& s, char separator, size_t len, Str* out_result);
+
+template<typename StrIn, typename Str, size_t kLen>
+void Split(const StrIn& s, char separator, std::array<Str, kLen>* out_result) {
+ Split<Str>(Str(s), separator, kLen, &((*out_result)[0]));
+}
// Returns the calling thread's tid. (The C libraries don't expose this.)
uint32_t GetTid();
diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc
index f67ada2..09705fe 100644
--- a/libartbase/base/utils_test.cc
+++ b/libartbase/base/utils_test.cc
@@ -15,6 +15,7 @@
*/
#include "utils.h"
+#include "stl_util.h"
#include "gtest/gtest.h"
@@ -41,9 +42,13 @@
EXPECT_EQ("512B", PrettySize(512));
}
+void Split(const char* arr, char s, std::vector<std::string_view>* sv) {
+ Split<std::string_view>(std::string_view(arr), s, sv);
+}
+
TEST_F(UtilsTest, Split) {
- std::vector<std::string> actual;
- std::vector<std::string> expected;
+ std::vector<std::string_view> actual;
+ std::vector<std::string_view> expected;
expected.clear();
@@ -115,4 +120,48 @@
EXPECT_EQ("<unknown>", GetProcessStatus("InvalidFieldName"));
}
+TEST_F(UtilsTest, StringSplit) {
+ auto range = SplitString("[ab[c[[d[e[", '[');
+ auto it = range.begin();
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "ab");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "c");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "d");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "e");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_TRUE(it == range.end());
+}
+
+TEST_F(UtilsTest, StringSplit2) {
+ auto range = SplitString("ab[c[[d[e", '[');
+ auto it = range.begin();
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "ab");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "c");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "d");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "e");
+ EXPECT_TRUE(it == range.end());
+}
+
+TEST_F(UtilsTest, StringSplit3) {
+ auto range = SplitString("", '[');
+ auto it = range.begin();
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_TRUE(it == range.end());
+}
+
} // namespace art