string: handle trailing/leading delimiters in tokeniser

This commit is contained in:
Danny Robson 2021-12-17 10:32:19 +10:00
parent 462e2e6ad1
commit 2d0324f17a
2 changed files with 73 additions and 32 deletions

View File

@ -116,7 +116,7 @@ namespace cruft {
/// It is up to the constructor to ensure the lifetime of the string. /// It is up to the constructor to ensure the lifetime of the string.
/// This class simply provides an interface for iteration and doesn't /// This class simply provides an interface for iteration and doesn't
/// concern itself with lifetimes. /// concern itself with lifetimes.
template <typename Iterator> template <std::forward_iterator Iterator>
struct tokeniser { struct tokeniser {
public: public:
using value_type = view<Iterator>; using value_type = view<Iterator>;
@ -132,16 +132,16 @@ namespace cruft {
m_separator (_separator) m_separator (_separator)
{ ; } { ; }
struct iterator { struct iterator {
public: public:
using iterator_category = std::forward_iterator_tag; using iterator_category = std::forward_iterator_tag;
using value_type = tokeniser::value_type; using value_type = tokeniser::value_type;
using difference_type = std::size_t; using difference_type = std::iter_difference_t<Iterator>;
using pointer = value_type*; using pointer = value_type*;
using reference = value_type&; using reference = value_type&;
iterator operator++ (int) iterator
operator++ (int)
{ {
iterator res(*this); iterator res(*this);
++*this; ++*this;
@ -150,17 +150,14 @@ namespace cruft {
iterator& operator++ (void)& iterator& operator++ (void)&
{ {
auto newend = m_range.cend (); if (m_data.end () == m_end) {
if (newend != m_end) { m_data = { m_data.end (), m_data.end () };
CHECK_EQ (*m_range.cend (), m_separator); m_base = m_end;
newend++; return *this;
} }
m_range = { m_base = m_data.end ();
newend, m_data = { m_data.end () + 1, std::find (m_data.end () + 1, m_end, m_separator) };
std::find (newend, m_end, m_separator)
};
return *this; return *this;
} }
@ -169,34 +166,42 @@ namespace cruft {
return std::next (*this, count); return std::next (*this, count);
} }
auto const& operator* (void) const& { return m_range; } auto const&
auto const* operator-> (void) const& { return &m_range; } operator* (void) const&
{
return m_data;
}
auto
operator-> (void) const&
{
return &m_data;
}
bool operator== (const iterator &rhs) const bool operator== (const iterator &rhs) const
{ {
return m_range == rhs.m_range && m_separator == rhs.m_separator; CHECK_EQ (m_separator, rhs.m_separator);
return m_base == rhs.m_base;
} }
bool operator!= (const iterator &rhs) const bool operator!= (iterator const &rhs) const
{ {
return !(*this == rhs); return !(*this == rhs);
} }
iterator (value_type _range, element_type _separator)
: m_separator (_separator)
, m_base (_range.begin ())
, m_data (_range.begin (), std::find (_range.begin (), _range.end (), _separator))
, m_end (_range.end ())
{ ; }
private: private:
iterator (value_type _range, element_type _separator):
m_separator (_separator),
m_range {
_range.cbegin (),
std::find (_range.cbegin (), _range.cend (), _separator)
},
m_end (_range.cend ())
{ ; }
element_type m_separator; element_type m_separator;
value_type m_range;
Iterator m_end;
friend tokeniser; Iterator m_base;
view<Iterator> m_data;
Iterator m_end;
}; };

View File

@ -66,8 +66,43 @@ test_position (cruft::TAP::logger &tap)
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
void static void
test_tokeniser (cruft::TAP::logger &tap) test_short_tokeniser (cruft::TAP::logger &tap)
{
struct {
char const *src;
std::vector<std::string_view> parts;
char const *message;
} TESTS[] = {
{ "foo", { "foo" }, "no separator" },
{ "foo bar", { "foo", "bar" }, "one separator" },
{ " foo", { "", "foo" }, "leading separator" },
{ "foo ", { "foo", "" }, "trailing separator" },
{ "", { }, "no separators" },
{ " ", { "", "" }, "only separators, single" },
{ " ", { "", "", "" }, "only separators, double" },
};
for (auto const &[src, expected, message]: TESTS) {
std::vector<std::string_view> computed;
cruft::tokeniser const tok (src, ' ');
std::transform (
std::begin (tok),
std::end (tok),
std::back_inserter (computed),
[] (auto const i)
{
return std::string_view (i.begin (), i.size ());
});
tap.expect_eq (expected, computed, "tokeniser: {}", message);
}
}
//-----------------------------------------------------------------------------
static void
test_long_tokeniser (cruft::TAP::logger &tap)
{ {
// the string_literal prefix is required to (easily) construct a string // the string_literal prefix is required to (easily) construct a string
// with an internal null character. // with an internal null character.
@ -221,7 +256,8 @@ main (int, char**)
cruft::TAP::logger tap; cruft::TAP::logger tap;
test_transforms (tap); test_transforms (tap);
test_tokeniser (tap); test_short_tokeniser (tap);
test_long_tokeniser (tap);
test_position (tap); test_position (tap);
test_contains (tap); test_contains (tap);
test_comparator_less (tap); test_comparator_less (tap);