diff options
author | 2021-02-08 17:46:15 -0800 | |
---|---|---|
committer | 2021-02-10 01:39:40 +0000 | |
commit | 60117aeeffda3d01a5314984694ae3d6d4588fc1 (patch) | |
tree | 12f5062c456de8b50418a8fb74c801c8ccc5b690 | |
parent | c7ac91b21d1a15c14e29d69ff02b48c485962b0d (diff) |
Improve string splitting
String splitting is something that we often have to do but our support
code for doing so is not the best. Add support for using
std::string_view in many circumstances and add support for making an
iterator of splits without allocation.
Test: ./test.py --host
Change-Id: I1b56b7e10926a064b64011326b508dd4af707df9
-rw-r--r-- | libartbase/base/stl_util.h | 54 | ||||
-rw-r--r-- | libartbase/base/utils.cc | 49 | ||||
-rw-r--r-- | libartbase/base/utils.h | 11 | ||||
-rw-r--r-- | libartbase/base/utils_test.cc | 53 |
4 files changed, 152 insertions, 15 deletions
diff --git a/libartbase/base/stl_util.h b/libartbase/base/stl_util.h index 4d4a686620..dfe994ed09 100644 --- a/libartbase/base/stl_util.h +++ b/libartbase/base/stl_util.h @@ -323,6 +323,60 @@ SafePrinter<Val> SafePrint(const Val* v) { return SafePrinter<Val>{v}; } +// Helper struct for iterating a split-string without allocation. +struct SplitStringIter : public std::iterator<std::forward_iterator_tag, std::string_view> { + public: + // Direct iterator constructor. The iteration state is only the current index. + // We use that with the split char and the full string to get the current and + // next segment. + SplitStringIter(size_t index, char split, std::string_view sv) + : cur_index_(index), split_on_(split), sv_(sv) {} + SplitStringIter(const SplitStringIter&) = default; + SplitStringIter(SplitStringIter&&) = default; + SplitStringIter& operator=(SplitStringIter&&) = default; + SplitStringIter& operator=(const SplitStringIter&) = default; + + SplitStringIter& operator++() { + size_t nxt = sv_.find(split_on_, cur_index_); + if (nxt == std::string_view::npos) { + cur_index_ = std::string_view::npos; + } else { + cur_index_ = nxt + 1; + } + return *this; + } + + SplitStringIter operator++(int) { + SplitStringIter ret(cur_index_, split_on_, sv_); + ++(*this); + return ret; + } + + bool operator==(const SplitStringIter& other) const { + return sv_ == other.sv_ && split_on_ == other.split_on_ && cur_index_== other.cur_index_; + } + + bool operator!=(const SplitStringIter& other) const { + return !(*this == other); + } + + typename std::string_view operator*() const { + return sv_.substr(cur_index_, sv_.substr(cur_index_).find(split_on_)); + } + + private: + size_t cur_index_; + char split_on_; + std::string_view sv_; +}; + +// Create an iteration range over the string 'sv' split at each 'target' occurrence. +// Eg: SplitString(":foo::bar") -> ["", "foo", "", "bar"] +inline IterationRange<SplitStringIter> SplitString(std::string_view sv, char target) { + return MakeIterationRange(SplitStringIter(0, target, sv), + SplitStringIter(std::string_view::npos, target, sv)); +} + } // namespace art #endif // ART_LIBARTBASE_BASE_STL_UTIL_H_ diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc index 492e737f4e..ba62f30bdc 100644 --- a/libartbase/base/utils.cc +++ b/libartbase/base/utils.cc @@ -25,11 +25,13 @@ #include <fstream> #include <memory> +#include <string> #include "android-base/file.h" #include "android-base/stringprintf.h" #include "android-base/strings.h" +#include "base/stl_util.h" #include "bit_utils.h" #include "os.h" @@ -230,22 +232,45 @@ std::string PrettySize(uint64_t byte_count) { byte_count / kBytesPerUnit[i], kUnitStrings[i]); } -void Split(const std::string& s, char separator, std::vector<std::string>* result) { - const char* p = s.data(); - const char* end = p + s.size(); - while (p != end) { - if (*p == separator) { - ++p; - } else { - const char* start = p; - while (++p != end && *p != separator) { - // Skip to the next occurrence of the separator. - } - result->push_back(std::string(start, p - start)); +template <typename StrIn, typename Str> +void Split(const StrIn& s, char separator, std::vector<Str>* out_result) { + auto split = SplitString(std::string_view(s), separator); + for (std::string_view p : split) { + if (p.empty()) { + continue; } + out_result->push_back(Str(p)); } } +template void Split(const char *const& s, char separator, std::vector<std::string>* out_result); +template void Split(const std::string& s, char separator, std::vector<std::string>* out_result); +template void Split(const char *const& s, char separator, std::vector<std::string_view>* out_result); +template void Split(const std::string_view& s, + char separator, + std::vector<std::string_view>* out_result); + +template <typename Str> +void Split(const Str& s, char separator, size_t len, Str* out_result) { + Str* last = out_result + len; + auto split = SplitString(std::string_view(s), separator); + for (std::string_view p : split) { + if (p.empty()) { + continue; + } + if (out_result == last) { + return; + } + *out_result++ = Str(p); + } +} + +template void Split(const std::string& s, char separator, size_t len, std::string* out_result); +template void Split(const std::string_view& s, + char separator, + size_t len, + std::string_view* out_result); + void SetThreadName(const char* thread_name) { bool hasAt = false; bool hasDot = false; diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h index 66a5699519..7160302daa 100644 --- a/libartbase/base/utils.h +++ b/libartbase/base/utils.h @@ -44,7 +44,16 @@ std::string PrettySize(uint64_t size_in_bytes); // Splits a string using the given separator character into a vector of // strings. Empty strings will be omitted. -void Split(const std::string& s, char separator, std::vector<std::string>* result); +template<typename StrIn, typename Str> +void Split(const StrIn& s, char separator, std::vector<Str>* out_result); + +template<typename Str> +void Split(const Str& s, char separator, size_t len, Str* out_result); + +template<typename StrIn, typename Str, size_t kLen> +void Split(const StrIn& s, char separator, std::array<Str, kLen>* out_result) { + Split<Str>(Str(s), separator, kLen, &((*out_result)[0])); +} // Returns the calling thread's tid. (The C libraries don't expose this.) uint32_t GetTid(); diff --git a/libartbase/base/utils_test.cc b/libartbase/base/utils_test.cc index f67ada261e..09705fe862 100644 --- a/libartbase/base/utils_test.cc +++ b/libartbase/base/utils_test.cc @@ -15,6 +15,7 @@ */ #include "utils.h" +#include "stl_util.h" #include "gtest/gtest.h" @@ -41,9 +42,13 @@ TEST_F(UtilsTest, PrettySize) { EXPECT_EQ("512B", PrettySize(512)); } +void Split(const char* arr, char s, std::vector<std::string_view>* sv) { + Split<std::string_view>(std::string_view(arr), s, sv); +} + TEST_F(UtilsTest, Split) { - std::vector<std::string> actual; - std::vector<std::string> expected; + std::vector<std::string_view> actual; + std::vector<std::string_view> expected; expected.clear(); @@ -115,4 +120,48 @@ TEST_F(UtilsTest, GetProcessStatus) { EXPECT_EQ("<unknown>", GetProcessStatus("InvalidFieldName")); } +TEST_F(UtilsTest, StringSplit) { + auto range = SplitString("[ab[c[[d[e[", '['); + auto it = range.begin(); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, ""); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "ab"); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "c"); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, ""); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "d"); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "e"); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, ""); + EXPECT_TRUE(it == range.end()); +} + +TEST_F(UtilsTest, StringSplit2) { + auto range = SplitString("ab[c[[d[e", '['); + auto it = range.begin(); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "ab"); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "c"); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, ""); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "d"); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, "e"); + EXPECT_TRUE(it == range.end()); +} + +TEST_F(UtilsTest, StringSplit3) { + auto range = SplitString("", '['); + auto it = range.begin(); + EXPECT_FALSE(it == range.end()); + EXPECT_EQ(*it++, ""); + EXPECT_TRUE(it == range.end()); +} + } // namespace art |