From 2d0324f17a555a454004e8e36dfe9378506bcab5 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Fri, 17 Dec 2021 10:32:19 +1000 Subject: [PATCH] string: handle trailing/leading delimiters in tokeniser --- string.hpp | 63 ++++++++++++++++++++++++++----------------------- test/string.cpp | 42 ++++++++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 32 deletions(-) diff --git a/string.hpp b/string.hpp index d4b85403..18e5cf19 100644 --- a/string.hpp +++ b/string.hpp @@ -116,7 +116,7 @@ namespace cruft { /// It is up to the constructor to ensure the lifetime of the string. /// This class simply provides an interface for iteration and doesn't /// concern itself with lifetimes. - template + template struct tokeniser { public: using value_type = view; @@ -132,16 +132,16 @@ namespace cruft { m_separator (_separator) { ; } - struct iterator { public: using iterator_category = std::forward_iterator_tag; using value_type = tokeniser::value_type; - using difference_type = std::size_t; + using difference_type = std::iter_difference_t; using pointer = value_type*; using reference = value_type&; - iterator operator++ (int) + iterator + operator++ (int) { iterator res(*this); ++*this; @@ -150,17 +150,14 @@ namespace cruft { iterator& operator++ (void)& { - auto newend = m_range.cend (); - if (newend != m_end) { - CHECK_EQ (*m_range.cend (), m_separator); - newend++; + if (m_data.end () == m_end) { + m_data = { m_data.end (), m_data.end () }; + m_base = m_end; + return *this; } - m_range = { - newend, - std::find (newend, m_end, m_separator) - }; - + m_base = m_data.end (); + m_data = { m_data.end () + 1, std::find (m_data.end () + 1, m_end, m_separator) }; return *this; } @@ -169,34 +166,42 @@ namespace cruft { return std::next (*this, count); } - auto const& operator* (void) const& { return m_range; } - auto const* operator-> (void) const& { return &m_range; } + auto const& + operator* (void) const& + { + return m_data; + } + + auto + operator-> (void) const& + { + return &m_data; + } bool operator== (const iterator &rhs) const { - return m_range == rhs.m_range && m_separator == rhs.m_separator; + CHECK_EQ (m_separator, rhs.m_separator); + return m_base == rhs.m_base; } - bool operator!= (const iterator &rhs) const + bool operator!= (iterator const &rhs) const { return !(*this == rhs); } + iterator (value_type _range, element_type _separator) + : m_separator (_separator) + , m_base (_range.begin ()) + , m_data (_range.begin (), std::find (_range.begin (), _range.end (), _separator)) + , m_end (_range.end ()) + { ; } + private: - iterator (value_type _range, element_type _separator): - m_separator (_separator), - m_range { - _range.cbegin (), - std::find (_range.cbegin (), _range.cend (), _separator) - }, - m_end (_range.cend ()) - { ; } - element_type m_separator; - value_type m_range; - Iterator m_end; - friend tokeniser; + Iterator m_base; + view m_data; + Iterator m_end; }; diff --git a/test/string.cpp b/test/string.cpp index db00c9cf..ee5c959f 100644 --- a/test/string.cpp +++ b/test/string.cpp @@ -66,8 +66,43 @@ test_position (cruft::TAP::logger &tap) /////////////////////////////////////////////////////////////////////////////// -void -test_tokeniser (cruft::TAP::logger &tap) +static void +test_short_tokeniser (cruft::TAP::logger &tap) +{ + struct { + char const *src; + std::vector parts; + char const *message; + } TESTS[] = { + { "foo", { "foo" }, "no separator" }, + { "foo bar", { "foo", "bar" }, "one separator" }, + { " foo", { "", "foo" }, "leading separator" }, + { "foo ", { "foo", "" }, "trailing separator" }, + { "", { }, "no separators" }, + { " ", { "", "" }, "only separators, single" }, + { " ", { "", "", "" }, "only separators, double" }, + }; + + for (auto const &[src, expected, message]: TESTS) { + std::vector computed; + cruft::tokeniser const tok (src, ' '); + std::transform ( + std::begin (tok), + std::end (tok), + std::back_inserter (computed), + [] (auto const i) + { + return std::string_view (i.begin (), i.size ()); + }); + + tap.expect_eq (expected, computed, "tokeniser: {}", message); + } +} + + +//----------------------------------------------------------------------------- +static void +test_long_tokeniser (cruft::TAP::logger &tap) { // the string_literal prefix is required to (easily) construct a string // with an internal null character. @@ -221,7 +256,8 @@ main (int, char**) cruft::TAP::logger tap; test_transforms (tap); - test_tokeniser (tap); + test_short_tokeniser (tap); + test_long_tokeniser (tap); test_position (tap); test_contains (tap); test_comparator_less (tap);