libcruft-util/string.hpp

274 lines
7.8 KiB
C++
Raw Normal View History

2011-08-12 00:25:59 +10:00
/*
2018-08-04 15:14:06 +10:00
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
2011-08-12 00:25:59 +10:00
*
* Copyright 2011-2018 Danny Robson <danny@nerdcruft.net>
2011-08-12 00:25:59 +10:00
*/
#pragma once
2011-08-12 00:25:59 +10:00
2019-05-02 11:03:22 +10:00
#include "ascii.hpp"
#include "debug.hpp"
#include "view.hpp"
2016-03-17 18:13:19 +11:00
2019-05-01 12:38:55 +10:00
#include <string>
#include <type_traits>
namespace cruft {
std::string to_utf8 (const wchar_t*);
std::string to_utf8 (const std::wstring&);
}
2011-08-12 00:25:59 +10:00
bool
strbegins(const char *restrict str,
const char *restrict prefix);
2016-03-17 18:13:19 +11:00
namespace cruft {
2019-02-20 15:51:54 +11:00
/// Provides an iterator interface over a string, breaking at each
/// occurence of a specific character.
///
/// It is up to the constructor to ensure the lifetime of the string.
/// This class simply provides an interface for iteration and doesn't
/// concern itself with lifetimes.
2016-03-18 11:08:12 +11:00
template <typename Iterator>
2016-03-17 18:13:19 +11:00
struct tokeniser {
public:
2016-03-18 11:08:12 +11:00
using value_type = typename std::iterator_traits<Iterator>::value_type;
using range_type = view<Iterator>;
2016-03-17 18:13:19 +11:00
2019-02-20 15:51:54 +11:00
tokeniser (cruft::view<Iterator,Iterator> _range, value_type _separator):
m_range (_range),
m_separator (_separator)
{ ; }
2016-03-17 18:13:19 +11:00
tokeniser (const char *_range, char _separator):
m_range (_range),
m_separator (_separator)
2017-09-15 15:22:29 +10:00
{ ; }
2019-02-20 15:51:54 +11:00
2017-09-15 15:22:29 +10:00
struct iterator : public std::iterator<
std::forward_iterator_tag,
range_type,
std::size_t
> {
2016-03-17 18:13:19 +11:00
public:
iterator operator++ (int)
{
iterator res(*this);
++*this;
return res;
}
iterator& operator++ (void)&
{
auto newend = m_range.cend ();
if (newend != m_end) {
CHECK_EQ (*m_range.cend (), m_separator);
newend++;
}
m_range = {
newend,
std::find (newend, m_end, m_separator)
};
return *this;
}
iterator operator+ (int count)
{
return std::next (*this, count);
}
2019-02-20 15:51:54 +11:00
auto const& operator* (void) const& { return m_range; }
auto const* operator-> (void) const& { return &m_range; }
bool operator== (const iterator &rhs) const
{
return m_range == rhs.m_range && m_separator == rhs.m_separator;
}
bool operator!= (const iterator &rhs) const
{
return !(*this == rhs);
}
2016-03-17 18:13:19 +11:00
private:
iterator (range_type _range, value_type _separator):
m_separator (_separator),
2019-02-20 15:51:54 +11:00
m_range {
_range.cbegin (),
std::find (_range.cbegin (), _range.cend (), _separator)
},
m_end (_range.cend ())
{ ; }
2016-03-18 11:08:12 +11:00
value_type m_separator;
2016-03-17 18:13:19 +11:00
range_type m_range;
2016-03-18 11:08:12 +11:00
Iterator m_end;
friend tokeniser;
2016-03-17 18:13:19 +11:00
};
2019-02-20 15:51:54 +11:00
iterator cbegin (void) const { return { m_range, m_separator }; }
iterator begin (void) const { return { m_range, m_separator }; }
2016-03-17 18:13:19 +11:00
iterator cend (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
iterator end (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
2016-03-17 18:13:19 +11:00
2018-07-18 15:19:35 +10:00
std::size_t
size (void) const
{
return std::count (
m_range.begin (),
m_range.end (),
m_separator
) + 1u;
}
2019-02-20 14:44:33 +11:00
bool contains (std::string_view key) const noexcept
{
2019-02-20 15:51:54 +11:00
for (auto const &i: *this)
if (equal (i, key))
return true;
return false;
2019-02-20 14:44:33 +11:00
}
2016-03-17 18:13:19 +11:00
private:
2016-03-18 11:08:12 +11:00
const range_type m_range;
2016-03-17 18:13:19 +11:00
const value_type m_separator;
};
2016-03-18 11:08:12 +11:00
2017-09-15 15:22:29 +10:00
template <typename ContainerT>
tokeniser (ContainerT&, typename ContainerT::value_type) -> tokeniser<typename ContainerT::iterator>;
tokeniser (const char*,char) -> tokeniser<const char*>;
2017-09-15 15:22:29 +10:00
///////////////////////////////////////////////////////////////////////////
template <typename CharT, std::size_t LengthV>
auto
make_tokeniser (CharT (&data)[LengthV], CharT separator)
{
return tokeniser { std::begin (data), std::end (data), separator };
}
2016-03-17 18:13:19 +11:00
2011-08-12 00:25:59 +10:00
///////////////////////////////////////////////////////////////////////////
/// Calculate the line and column of an iterator within a view.
///
/// Returns an anonymous struct containing the line and column number.
///
/// The column number for newline is undefined. However it should never
/// return the value {0,-1} and so it should not result in underruns when
/// offsetting a pointer using the column index.
template <typename IteratorT>
auto
character_position (
cruft::view<IteratorT> data,
IteratorT const cursor
) {
using difference_type = typename std::iterator_traits<IteratorT>::difference_type;
difference_type line_count = 0;
auto current_line = data.begin ();
do {
auto next_line = std::find (current_line, data.end (), '\n');
if (next_line == data.end () || next_line > cursor)
break;
++line_count;
current_line = next_line + 1;
} while (1);
struct {
difference_type line;
difference_type column;
} val = {
.line = line_count,
.column = cursor - current_line,
};
return val;
}
2019-05-01 12:38:55 +10:00
///////////////////////////////////////////////////////////////////////////
/// A comparator for string-like objects that uses strcmp rather than
/// pointer comparison.
///
/// TODO: handle string and string_view objects
struct string_less {
template <
typename CharT,
typename = std::void_t<typename std::char_traits<CharT>::char_type>
>
bool operator() (CharT const *a, CharT const *b) const noexcept
{
return strcmp (a, b) < 0;
}
};
}
2019-05-02 11:03:22 +10:00
namespace cruft::string::compare {
///////////////////////////////////////////////////////////////////////////
/// A case comparator that tests equality on a string after a
/// per-character transform is applied.
///
/// Neither string will be modified.
///
/// \tparam TransformV A character transform function
template <char (*TransformV)(char) noexcept>
struct transform {
template <
typename CharT,
typename = std::void_t<
typename std::char_traits<CharT>::char_type
>
>
bool operator() (cruft::view<CharT const *> a, cruft::view<CharT const*> b)
{
if (a.size () != b.size ())
return false;
for (auto i = a.begin (), j = b.begin (); i != a.end (); ++i, ++j)
if (TransformV (*i) != TransformV (*j))
return false;
return true;
}
template <
typename CharT,
typename = std::void_t<
typename std::char_traits<CharT>::char_type
>
>
bool operator() (CharT const *a, CharT const *b) const noexcept
{
auto const *i = a;
auto const *j = b;
for ( ; *i && *j; ++i, ++j)
if (TransformV (*i) != TransformV (*j))
return false;
// Ensure we've reached the ends of both strings
return !*i && !*j;
}
};
using lower = transform<ascii::try_lower>;
using upper = transform<ascii::try_upper>;
}