string: add tokeniser

This commit is contained in:
Danny Robson 2016-03-17 18:13:19 +11:00
parent 28d44593d8
commit 6a1ea8ff29
4 changed files with 171 additions and 4 deletions

View File

@ -399,6 +399,7 @@ TEST_BIN = \
test/sha1 \
test/sha2 \
test/signal \
test/string \
test/stringid \
test/strongdef \
test/tuple \

View File

@ -11,15 +11,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright 2011 Danny Robson <danny@nerdcruft.net>
* Copyright 2011-2016 Danny Robson <danny@nerdcruft.net>
*/
#include "./string.hpp"
#include "./cast.hpp"
#include <cstring>
using util::tokeniser;
///////////////////////////////////////////////////////////////////////////////
// TODO: Horribly inefficient, but God help you if you're relying on this
// being efficient in the first place.
bool
@ -27,3 +28,89 @@ strbegins (const char *restrict str,
const char *restrict prefix) {
return 0 == strncmp (prefix, str, strlen (prefix));
}
///////////////////////////////////////////////////////////////////////////////
tokeniser::tokeniser (const string_type &_value,
value_type _separator):
m_value (_value),
m_separator (_separator)
{ }
//-----------------------------------------------------------------------------
tokeniser::iterator
tokeniser::cbegin (void) const
{
return iterator (
{m_value.cbegin (), m_value.cend ()},
m_separator
);
}
//-----------------------------------------------------------------------------
tokeniser::iterator
tokeniser::cend (void) const
{
return iterator (
{m_value.cend (), m_value.cend ()},
m_separator
);
}
///////////////////////////////////////////////////////////////////////////////
tokeniser::iterator::iterator (range_type _range, value_type _separator):
m_separator (_separator),
m_range (_range.cbegin (),
std::find (_range.cbegin (),
_range.cend (),
_separator)),
m_end (_range.cend ())
{ }
//-----------------------------------------------------------------------------
tokeniser::iterator
tokeniser::iterator::operator++ (int)
{
iterator res(*this);
++*this;
return res;
}
//-----------------------------------------------------------------------------
tokeniser::iterator&
tokeniser::iterator::operator++ (void)
{
auto newend = m_range.cend ();
if (newend != m_end) {
CHECK_EQ (*m_range.cend (), m_separator);
newend++;
}
m_range = {
newend,
std::find (newend, m_end, m_separator)
};
return *this;
}
//-----------------------------------------------------------------------------
tokeniser::iterator::range_type
tokeniser::iterator::operator* (void) const
{
return m_range;
}
//-----------------------------------------------------------------------------
bool
tokeniser::iterator::operator== (const iterator &rhs) const
{
return m_range == rhs.m_range && m_separator == rhs.m_separator;
}

View File

@ -11,15 +11,59 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright 2011 Danny Robson <danny@nerdcruft.net>
* Copyright 2011-2016 Danny Robson <danny@nerdcruft.net>
*/
#ifndef __UTIL_STRING_HPP
#define __UTIL_STRING_HPP
#include "./view.hpp"
bool
strbegins(const char *restrict str,
const char *restrict prefix);
namespace util {
struct tokeniser {
public:
using string_type = std::string;
using value_type = string_type::value_type;
tokeniser (const std::string &value, char separator);
tokeniser (std::string &&value, char separator) = delete;
struct iterator {
public:
using range_type = util::view<string_type::const_iterator>;
iterator (range_type range, char separator);
iterator operator++ (int);
iterator& operator++ (void);
range_type operator* (void) const;
bool operator== (const iterator&) const;
private:
const value_type m_separator;
range_type m_range;
string_type::const_iterator m_end;
};
iterator cbegin (void) const;
iterator begin (void) const;
iterator cend (void) const;
iterator end (void) const;
private:
const string_type &m_value;
const value_type m_separator;
};
}
#endif // __UTIL_STRING_HPP

35
test/string.cpp Normal file
View File

@ -0,0 +1,35 @@
#include <cruft/util/tap.hpp>
#include <cruft/util/string.hpp>
#include <cruft/util/types.hpp>
int
main (int, char**)
{
util::TAP::logger tap;
const char csv[] = "\0,a,123,,this is a test,";
const std::string values[] = {
{ "\0", 1 },
{ "a" },
{ "123" },
{ "" },
{ "this is a test" },
{ "" }
};
std::string str (std::cbegin (csv), std::cbegin (csv) + elems (csv));
auto tok = util::tokeniser (str, ',');
auto t_cursor = tok.cbegin ();
auto v_cursor = std::cbegin (values);
tap.expect_eq (*t_cursor++, *v_cursor++, "tokeniser, single letter");
tap.expect_eq (*t_cursor++, *v_cursor++, "tokeniser, three digits");
tap.expect_eq (*t_cursor++, *v_cursor++, "tokeniser, embedded null");
tap.expect_eq (*t_cursor++, *v_cursor++, "tokeniser, empty string");
tap.expect_eq (*t_cursor++, *v_cursor++, "tokeniser, string with spaces");
tap.expect_eq (*t_cursor++, *v_cursor++, "tokeniser, trailing empty");
tap.expect_eq (t_cursor, tok.cend (), "tokeniser iterator at end");
return tap.status ();
}