summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Alex Light <allight@google.com> 2021-02-08 17:46:15 -0800
committer Alex Light <allight@google.com> 2021-02-10 01:39:40 +0000
commit60117aeeffda3d01a5314984694ae3d6d4588fc1 (patch)
tree12f5062c456de8b50418a8fb74c801c8ccc5b690
parentc7ac91b21d1a15c14e29d69ff02b48c485962b0d (diff)
Improve string splitting
String splitting is something that we often have to do but our support code for doing so is not the best. Add support for using std::string_view in many circumstances and add support for making an iterator of splits without allocation. Test: ./test.py --host Change-Id: I1b56b7e10926a064b64011326b508dd4af707df9
-rw-r--r--libartbase/base/stl_util.h54
-rw-r--r--libartbase/base/utils.cc49
-rw-r--r--libartbase/base/utils.h11
-rw-r--r--libartbase/base/utils_test.cc53
4 files changed, 152 insertions, 15 deletions
diff --git a/libartbase/base/stl_util.h b/libartbase/base/stl_util.h
index 4d4a686620..dfe994ed09 100644
--- a/libartbase/base/stl_util.h
+++ b/libartbase/base/stl_util.h
@@ -323,6 +323,60 @@ SafePrinter<Val> SafePrint(const Val* v) {
return SafePrinter<Val>{v};
}
+// Helper struct for iterating a split-string without allocation.
+struct SplitStringIter : public std::iterator<std::forward_iterator_tag, std::string_view> {
+ public:
+ // Direct iterator constructor. The iteration state is only the current index.
+ // We use that with the split char and the full string to get the current and
+ // next segment.
+ SplitStringIter(size_t index, char split, std::string_view sv)
+ : cur_index_(index), split_on_(split), sv_(sv) {}
+ SplitStringIter(const SplitStringIter&) = default;
+ SplitStringIter(SplitStringIter&&) = default;
+ SplitStringIter& operator=(SplitStringIter&&) = default;
+ SplitStringIter& operator=(const SplitStringIter&) = default;
+
+ SplitStringIter& operator++() {
+ size_t nxt = sv_.find(split_on_, cur_index_);
+ if (nxt == std::string_view::npos) {
+ cur_index_ = std::string_view::npos;
+ } else {
+ cur_index_ = nxt + 1;
+ }
+ return *this;
+ }
+
+ SplitStringIter operator++(int) {
+ SplitStringIter ret(cur_index_, split_on_, sv_);
+ ++(*this);
+ return ret;
+ }
+
+ bool operator==(const SplitStringIter& other) const {
+ return sv_ == other.sv_ && split_on_ == other.split_on_ && cur_index_== other.cur_index_;
+ }
+
+ bool operator!=(const SplitStringIter& other) const {
+ return !(*this == other);
+ }
+
+ typename std::string_view operator*() const {
+ return sv_.substr(cur_index_, sv_.substr(cur_index_).find(split_on_));
+ }
+
+ private:
+ size_t cur_index_;
+ char split_on_;
+ std::string_view sv_;
+};
+
+// Create an iteration range over the string 'sv' split at each 'target' occurrence.
+// Eg: SplitString(":foo::bar") -> ["", "foo", "", "bar"]
+inline IterationRange<SplitStringIter> SplitString(std::string_view sv, char target) {
+ return MakeIterationRange(SplitStringIter(0, target, sv),
+ SplitStringIter(std::string_view::npos, target, sv));
+}
+
} // namespace art
#endif // ART_LIBARTBASE_BASE_STL_UTIL_H_
diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc
index 492e737f4e..ba62f30bdc 100644
--- a/libartbase/base/utils.cc
+++ b/libartbase/base/utils.cc
@@ -25,11 +25,13 @@
#include <fstream>
#include <memory>
+#include <string>
#include "android-base/file.h"
#include "android-base/stringprintf.h"
#include "android-base/strings.h"
+#include "base/stl_util.h"
#include "bit_utils.h"
#include "os.h"
@@ -230,22 +232,45 @@ std::string PrettySize(uint64_t byte_count) {
byte_count / kBytesPerUnit[i], kUnitStrings[i]);
}
-void Split(const std::string& s, char separator, std::vector<std::string>* result) {
- const char* p = s.data();
- const char* end = p + s.size();
- while (p != end) {
- if (*p == separator) {
- ++p;
- } else {
- const char* start = p;
- while (++p != end && *p != separator) {
- // Skip to the next occurrence of the separator.
- }
- result->push_back(std::string(start, p - start));
+template <typename StrIn, typename Str>
+void Split(const StrIn& s, char separator, std::vector<Str>* out_result) {
+ auto split = SplitString(std::string_view(s), separator);
+ for (std::string_view p : split) {
+ if (p.empty()) {
+ continue;
}
+ out_result->push_back(Str(p));
}
}
+template void Split(const char *const& s, char separator, std::vector<std::string>* out_result);
+template void Split(const std::string& s, char separator, std::vector<std::string>* out_result);
+template void Split(const char *const& s, char separator, std::vector<std::string_view>* out_result);
+template void Split(const std::string_view& s,
+ char separator,
+ std::vector<std::string_view>* out_result);
+
+template <typename Str>
+void Split(const Str& s, char separator, size_t len, Str* out_result) {
+ Str* last = out_result + len;
+ auto split = SplitString(std::string_view(s), separator);
+ for (std::string_view p : split) {
+ if (p.empty()) {
+ continue;
+ }
+ if (out_result == last) {
+ return;
+ }
+ *out_result++ = Str(p);
+ }
+}
+
+template void Split(const std::string& s, char separator, size_t len, std::string* out_result);
+template void Split(const std::string_view& s,
+ char separator,
+ size_t len,
+ std::string_view* out_result);
+
void SetThreadName(const char* thread_name) {
bool hasAt = false;
bool hasDot = false;
diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h
index 66a5699519..7160302daa 100644
--- a/libartbase/base/utils.h
+++ b/libartbase/base/utils.h
@@ -44,7 +44,16 @@ std::string PrettySize(uint64_t size_in_bytes);
// Splits a string using the given separator character into a vector of
// strings. Empty strings will be omitted.
-void Split(const std::string& s, char separator, std::vector<std::string>* result);
+template<typename StrIn, typename Str>
+void Split(const StrIn& s, char separator, std::vector<Str>* out_result);
+
+template<typename Str>
+void Split(const Str& s, char separator, size_t len, Str* out_result);
+
+template<typename StrIn, typename Str, size_t kLen>
+void Split(const StrIn& s, char separator, std::array<Str, kLen>* out_result) {
+ Split<Str>(Str(s), separator, kLen, &((*out_result)[0]));
+}
// Returns the calling thread's tid. (The C libraries don't expose this.)
uint32_t GetTid();
diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc
index f67ada261e..09705fe862 100644
--- a/libartbase/base/utils_test.cc
+++ b/libartbase/base/utils_test.cc
@@ -15,6 +15,7 @@
*/
#include "utils.h"
+#include "stl_util.h"
#include "gtest/gtest.h"
@@ -41,9 +42,13 @@ TEST_F(UtilsTest, PrettySize) {
EXPECT_EQ("512B", PrettySize(512));
}
+void Split(const char* arr, char s, std::vector<std::string_view>* sv) {
+ Split<std::string_view>(std::string_view(arr), s, sv);
+}
+
TEST_F(UtilsTest, Split) {
- std::vector<std::string> actual;
- std::vector<std::string> expected;
+ std::vector<std::string_view> actual;
+ std::vector<std::string_view> expected;
expected.clear();
@@ -115,4 +120,48 @@ TEST_F(UtilsTest, GetProcessStatus) {
EXPECT_EQ("<unknown>", GetProcessStatus("InvalidFieldName"));
}
+TEST_F(UtilsTest, StringSplit) {
+ auto range = SplitString("[ab[c[[d[e[", '[');
+ auto it = range.begin();
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "ab");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "c");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "d");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "e");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_TRUE(it == range.end());
+}
+
+TEST_F(UtilsTest, StringSplit2) {
+ auto range = SplitString("ab[c[[d[e", '[');
+ auto it = range.begin();
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "ab");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "c");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "d");
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "e");
+ EXPECT_TRUE(it == range.end());
+}
+
+TEST_F(UtilsTest, StringSplit3) {
+ auto range = SplitString("", '[');
+ auto it = range.begin();
+ EXPECT_FALSE(it == range.end());
+ EXPECT_EQ(*it++, "");
+ EXPECT_TRUE(it == range.end());
+}
+
} // namespace art