Reimplement boost::split_iterator to make it less slow

boost::split_iterator type-erases the predicate, which makes it require
a virtual call per character (!) along with a heap allocation. As it
turns out we only ever need one predicate (comparing to a single
character), so replace it with a split_iterator that just does that.
This commit is contained in:
Thomas Goyne 2015-01-04 14:56:27 -08:00
parent d8bd9904d8
commit 427037a552
4 changed files with 80 additions and 23 deletions

View File

@ -1,4 +1,4 @@
// Copyright (c) 2013, Thomas Goyne <plorkyeran@aegisub.org>
// Copyright (c) 2015, Thomas Goyne <plorkyeran@aegisub.org>
//
// Permission to use, copy, modify, and distribute this software for any
// purpose with or without fee is hereby granted, provided that the above
@ -14,32 +14,89 @@
//
// Aegisub Project http://www.aegisub.org/
#include <boost/algorithm/string/finder.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/range/iterator_range.hpp>
namespace agi {
typedef boost::iterator_range<std::string::const_iterator> StringRange;
template<typename Iterator>
class split_iterator {
bool is_end = false;
Iterator b;
Iterator cur;
Iterator e;
typename Iterator::value_type c;
public:
using iterator_category = std::forward_iterator_tag;
using value_type = boost::iterator_range<Iterator>;
using pointer = value_type*;
using reference = value_type&;
using difference_type = ptrdiff_t;
split_iterator(Iterator begin, Iterator end, typename Iterator::value_type c)
: b(begin), cur(begin), e(end), c(c)
{
if (b != e)
cur = std::find(b, e, c);
else
is_end = true;
}
split_iterator() : is_end(true) { }
bool eof() const { return is_end; }
boost::iterator_range<Iterator> operator*() const {
return boost::make_iterator_range(b, cur);
}
bool operator==(split_iterator const& it) const {
if (is_end || it.is_end)
return is_end && it.is_end;
return b == it.b && cur == it.cur && e == it.e && c == it.c;
}
bool operator!=(split_iterator const& it) const {
return !(*this == it);
}
split_iterator& operator++() {
if (cur != e) {
b = cur + 1;
cur = std::find(b, e, c);
}
else {
b = e;
is_end = true;
}
return *this;
}
split_iterator operator++(int) {
split_iterator tmp = *this;
++*this;
return tmp;
}
};
template<typename Iterator>
split_iterator<Iterator> begin(split_iterator<Iterator> const& it) {
return it;
}
template<typename Iterator>
split_iterator<Iterator> end(split_iterator<Iterator> const&) {
return split_iterator<Iterator>();
}
template<typename Str, typename Char>
boost::split_iterator<typename Str::const_iterator> Split(Str const& str, Char delim) {
return boost::make_split_iterator(str, boost::token_finder([=](Char c) { return c == delim; }));
split_iterator<typename Str::const_iterator> Split(Str const& str, Char delim) {
return split_iterator<typename Str::const_iterator>(begin(str), end(str), delim);
}
static inline std::string str(StringRange const& r) {
return std::string(r.begin(), r.end());
}
}
namespace boost {
namespace algorithm {
template<typename Iterator>
split_iterator<Iterator> begin(split_iterator<Iterator> it) {
return it;
}
template<typename Iterator>
split_iterator<Iterator> end(split_iterator<Iterator>) {
return split_iterator<Iterator>();
}
}
}

View File

@ -73,7 +73,7 @@ AssDialogue::~AssDialogue () { }
class tokenizer {
agi::StringRange str;
boost::split_iterator<agi::StringRange::const_iterator> pos;
agi::split_iterator<agi::StringRange::const_iterator> pos;
public:
tokenizer(agi::StringRange const& str) : str(str) , pos(agi::Split(str, ',')) { }

View File

@ -54,7 +54,7 @@ AssEntryGroup AssStyle::Group() const { return AssEntryGroup::STYLE; }
namespace {
class parser {
boost::split_iterator<agi::StringRange::const_iterator> pos;
agi::split_iterator<agi::StringRange::const_iterator> pos;
std::string next_tok() {
if (pos.eof())

View File

@ -41,7 +41,7 @@ TEST(lagi_split, does_not_copy_input) {
}
auto rng = agi::Split(str, 'e');
EXPECT_EQ(str.begin(), rng->begin());
EXPECT_EQ(str.end(), std::next(rng)->end());
EXPECT_EQ(str.begin(), begin(*rng));
EXPECT_EQ(str.end(), end(*std::next(rng)));
}