string: handle trailing/leading delimiters in tokeniser

This commit is contained in:
Danny Robson 2021-12-17 10:32:19 +10:00
parent 462e2e6ad1
commit 2d0324f17a
2 changed files with 73 additions and 32 deletions

View File

@ -116,7 +116,7 @@ namespace cruft {
/// It is up to the constructor to ensure the lifetime of the string.
/// This class simply provides an interface for iteration and doesn't
/// concern itself with lifetimes.
template <typename Iterator>
template <std::forward_iterator Iterator>
struct tokeniser {
public:
using value_type = view<Iterator>;
@ -132,16 +132,16 @@ namespace cruft {
m_separator (_separator)
{ ; }
struct iterator {
public:
using iterator_category = std::forward_iterator_tag;
using value_type = tokeniser::value_type;
using difference_type = std::size_t;
using difference_type = std::iter_difference_t<Iterator>;
using pointer = value_type*;
using reference = value_type&;
iterator operator++ (int)
iterator
operator++ (int)
{
iterator res(*this);
++*this;
@ -150,17 +150,14 @@ namespace cruft {
iterator& operator++ (void)&
{
auto newend = m_range.cend ();
if (newend != m_end) {
CHECK_EQ (*m_range.cend (), m_separator);
newend++;
if (m_data.end () == m_end) {
m_data = { m_data.end (), m_data.end () };
m_base = m_end;
return *this;
}
m_range = {
newend,
std::find (newend, m_end, m_separator)
};
m_base = m_data.end ();
m_data = { m_data.end () + 1, std::find (m_data.end () + 1, m_end, m_separator) };
return *this;
}
@ -169,34 +166,42 @@ namespace cruft {
return std::next (*this, count);
}
auto const& operator* (void) const& { return m_range; }
auto const* operator-> (void) const& { return &m_range; }
auto const&
operator* (void) const&
{
return m_data;
}
auto
operator-> (void) const&
{
return &m_data;
}
bool operator== (const iterator &rhs) const
{
return m_range == rhs.m_range && m_separator == rhs.m_separator;
CHECK_EQ (m_separator, rhs.m_separator);
return m_base == rhs.m_base;
}
bool operator!= (const iterator &rhs) const
bool operator!= (iterator const &rhs) const
{
return !(*this == rhs);
}
iterator (value_type _range, element_type _separator)
: m_separator (_separator)
, m_base (_range.begin ())
, m_data (_range.begin (), std::find (_range.begin (), _range.end (), _separator))
, m_end (_range.end ())
{ ; }
private:
iterator (value_type _range, element_type _separator):
m_separator (_separator),
m_range {
_range.cbegin (),
std::find (_range.cbegin (), _range.cend (), _separator)
},
m_end (_range.cend ())
{ ; }
element_type m_separator;
value_type m_range;
Iterator m_end;
friend tokeniser;
Iterator m_base;
view<Iterator> m_data;
Iterator m_end;
};

View File

@ -66,8 +66,43 @@ test_position (cruft::TAP::logger &tap)
///////////////////////////////////////////////////////////////////////////////
void
test_tokeniser (cruft::TAP::logger &tap)
static void
test_short_tokeniser (cruft::TAP::logger &tap)
{
struct {
char const *src;
std::vector<std::string_view> parts;
char const *message;
} TESTS[] = {
{ "foo", { "foo" }, "no separator" },
{ "foo bar", { "foo", "bar" }, "one separator" },
{ " foo", { "", "foo" }, "leading separator" },
{ "foo ", { "foo", "" }, "trailing separator" },
{ "", { }, "no separators" },
{ " ", { "", "" }, "only separators, single" },
{ " ", { "", "", "" }, "only separators, double" },
};
for (auto const &[src, expected, message]: TESTS) {
std::vector<std::string_view> computed;
cruft::tokeniser const tok (src, ' ');
std::transform (
std::begin (tok),
std::end (tok),
std::back_inserter (computed),
[] (auto const i)
{
return std::string_view (i.begin (), i.size ());
});
tap.expect_eq (expected, computed, "tokeniser: {}", message);
}
}
//-----------------------------------------------------------------------------
static void
test_long_tokeniser (cruft::TAP::logger &tap)
{
// the string_literal prefix is required to (easily) construct a string
// with an internal null character.
@ -221,7 +256,8 @@ main (int, char**)
cruft::TAP::logger tap;
test_transforms (tap);
test_tokeniser (tap);
test_short_tokeniser (tap);
test_long_tokeniser (tap);
test_position (tap);
test_contains (tap);
test_comparator_less (tap);