string: handle trailing/leading delimiters in tokeniser
This commit is contained in:
parent
462e2e6ad1
commit
2d0324f17a
63
string.hpp
63
string.hpp
@ -116,7 +116,7 @@ namespace cruft {
|
||||
/// It is up to the constructor to ensure the lifetime of the string.
|
||||
/// This class simply provides an interface for iteration and doesn't
|
||||
/// concern itself with lifetimes.
|
||||
template <typename Iterator>
|
||||
template <std::forward_iterator Iterator>
|
||||
struct tokeniser {
|
||||
public:
|
||||
using value_type = view<Iterator>;
|
||||
@ -132,16 +132,16 @@ namespace cruft {
|
||||
m_separator (_separator)
|
||||
{ ; }
|
||||
|
||||
|
||||
struct iterator {
|
||||
public:
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = tokeniser::value_type;
|
||||
using difference_type = std::size_t;
|
||||
using difference_type = std::iter_difference_t<Iterator>;
|
||||
using pointer = value_type*;
|
||||
using reference = value_type&;
|
||||
|
||||
iterator operator++ (int)
|
||||
iterator
|
||||
operator++ (int)
|
||||
{
|
||||
iterator res(*this);
|
||||
++*this;
|
||||
@ -150,17 +150,14 @@ namespace cruft {
|
||||
|
||||
iterator& operator++ (void)&
|
||||
{
|
||||
auto newend = m_range.cend ();
|
||||
if (newend != m_end) {
|
||||
CHECK_EQ (*m_range.cend (), m_separator);
|
||||
newend++;
|
||||
if (m_data.end () == m_end) {
|
||||
m_data = { m_data.end (), m_data.end () };
|
||||
m_base = m_end;
|
||||
return *this;
|
||||
}
|
||||
|
||||
m_range = {
|
||||
newend,
|
||||
std::find (newend, m_end, m_separator)
|
||||
};
|
||||
|
||||
m_base = m_data.end ();
|
||||
m_data = { m_data.end () + 1, std::find (m_data.end () + 1, m_end, m_separator) };
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -169,34 +166,42 @@ namespace cruft {
|
||||
return std::next (*this, count);
|
||||
}
|
||||
|
||||
auto const& operator* (void) const& { return m_range; }
|
||||
auto const* operator-> (void) const& { return &m_range; }
|
||||
auto const&
|
||||
operator* (void) const&
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
auto
|
||||
operator-> (void) const&
|
||||
{
|
||||
return &m_data;
|
||||
}
|
||||
|
||||
bool operator== (const iterator &rhs) const
|
||||
{
|
||||
return m_range == rhs.m_range && m_separator == rhs.m_separator;
|
||||
CHECK_EQ (m_separator, rhs.m_separator);
|
||||
return m_base == rhs.m_base;
|
||||
}
|
||||
|
||||
bool operator!= (const iterator &rhs) const
|
||||
bool operator!= (iterator const &rhs) const
|
||||
{
|
||||
return !(*this == rhs);
|
||||
}
|
||||
|
||||
iterator (value_type _range, element_type _separator)
|
||||
: m_separator (_separator)
|
||||
, m_base (_range.begin ())
|
||||
, m_data (_range.begin (), std::find (_range.begin (), _range.end (), _separator))
|
||||
, m_end (_range.end ())
|
||||
{ ; }
|
||||
|
||||
private:
|
||||
iterator (value_type _range, element_type _separator):
|
||||
m_separator (_separator),
|
||||
m_range {
|
||||
_range.cbegin (),
|
||||
std::find (_range.cbegin (), _range.cend (), _separator)
|
||||
},
|
||||
m_end (_range.cend ())
|
||||
{ ; }
|
||||
|
||||
element_type m_separator;
|
||||
value_type m_range;
|
||||
Iterator m_end;
|
||||
|
||||
friend tokeniser;
|
||||
Iterator m_base;
|
||||
view<Iterator> m_data;
|
||||
Iterator m_end;
|
||||
};
|
||||
|
||||
|
||||
|
@ -66,8 +66,43 @@ test_position (cruft::TAP::logger &tap)
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
test_tokeniser (cruft::TAP::logger &tap)
|
||||
static void
|
||||
test_short_tokeniser (cruft::TAP::logger &tap)
|
||||
{
|
||||
struct {
|
||||
char const *src;
|
||||
std::vector<std::string_view> parts;
|
||||
char const *message;
|
||||
} TESTS[] = {
|
||||
{ "foo", { "foo" }, "no separator" },
|
||||
{ "foo bar", { "foo", "bar" }, "one separator" },
|
||||
{ " foo", { "", "foo" }, "leading separator" },
|
||||
{ "foo ", { "foo", "" }, "trailing separator" },
|
||||
{ "", { }, "no separators" },
|
||||
{ " ", { "", "" }, "only separators, single" },
|
||||
{ " ", { "", "", "" }, "only separators, double" },
|
||||
};
|
||||
|
||||
for (auto const &[src, expected, message]: TESTS) {
|
||||
std::vector<std::string_view> computed;
|
||||
cruft::tokeniser const tok (src, ' ');
|
||||
std::transform (
|
||||
std::begin (tok),
|
||||
std::end (tok),
|
||||
std::back_inserter (computed),
|
||||
[] (auto const i)
|
||||
{
|
||||
return std::string_view (i.begin (), i.size ());
|
||||
});
|
||||
|
||||
tap.expect_eq (expected, computed, "tokeniser: {}", message);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
static void
|
||||
test_long_tokeniser (cruft::TAP::logger &tap)
|
||||
{
|
||||
// the string_literal prefix is required to (easily) construct a string
|
||||
// with an internal null character.
|
||||
@ -221,7 +256,8 @@ main (int, char**)
|
||||
cruft::TAP::logger tap;
|
||||
|
||||
test_transforms (tap);
|
||||
test_tokeniser (tap);
|
||||
test_short_tokeniser (tap);
|
||||
test_long_tokeniser (tap);
|
||||
test_position (tap);
|
||||
test_contains (tap);
|
||||
test_comparator_less (tap);
|
||||
|
Loading…
Reference in New Issue
Block a user