libcruft-util/cruft/util/string.hpp

422 lines
11 KiB
C++
Raw Normal View History

2011-08-12 00:25:59 +10:00
/*
2018-08-04 15:14:06 +10:00
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
2011-08-12 00:25:59 +10:00
*
* Copyright 2011-2018 Danny Robson <danny@nerdcruft.net>
2011-08-12 00:25:59 +10:00
*/
#pragma once
2011-08-12 00:25:59 +10:00
2019-05-02 11:03:22 +10:00
#include "ascii.hpp"
#include "debug/assert.hpp"
#include "view.hpp"
2016-03-17 18:13:19 +11:00
2019-05-01 12:38:55 +10:00
#include <string>
#include <type_traits>
namespace cruft {
std::string to_utf8 (const wchar_t*);
std::string to_utf8 (const std::wstring&);
2019-11-26 07:49:23 +11:00
2019-11-26 08:10:04 +11:00
/// Apply a functor to each character of a string and return the result.
template <typename FunctionT>
std::string
transform (std::string &&val, FunctionT &&func)
2019-11-26 07:49:23 +11:00
{
std::transform (
std::begin (val),
std::end (val),
std::begin (val),
2019-11-26 08:10:04 +11:00
std::forward<FunctionT> (func)
2019-11-26 07:49:23 +11:00
);
2019-11-26 08:10:04 +11:00
2019-11-26 07:49:23 +11:00
return std::move (val);
}
2019-11-26 08:10:04 +11:00
/// Apply a functor to each character of a string and return the result.
template <typename FunctionT>
std::string
transform (std::string_view val, FunctionT &&func)
2019-11-26 07:49:23 +11:00
{
std::string res;
res.reserve (val.size ());
std::transform (
std::begin (val),
std::end (val),
std::back_inserter (res),
2019-11-26 08:10:04 +11:00
std::forward<FunctionT> (func)
2019-11-26 07:49:23 +11:00
);
return res;
2019-11-26 08:10:04 +11:00
}
inline std::string
to_upper (std::string_view const &val)
{
return transform (val, ::toupper);
}
template <std::size_t N>
inline std::string
to_upper (char const (&val)[N])
{
return to_upper (std::string_view (val));
}
2019-11-26 08:10:04 +11:00
/// Convert the provided string to all upper case
inline std::string
to_upper (std::string &&val)
{
return transform (std::move (val), ::toupper);
}
/// Convert the provided string to all upper case
inline std::string
to_upper (std::string const &val)
{
return transform (val, ::toupper);
}
/// Convert the provided string to all upper case
inline std::string
to_lower (std::string &&val)
{
return transform (std::move (val), ::tolower);
}
/// Convert the provided string to all upper case
inline std::string
to_lower (std::string const &val)
{
return transform (val, ::tolower);
2019-11-26 07:49:23 +11:00
}
2022-05-02 14:26:06 +10:00
inline std::string_view
lstrip (std::string_view str)
{
auto const pos = std::find_if_not (std::begin (str), std::end (str), ascii::is_space);
return { pos, std::end (str) };
}
inline std::string_view
rstrip (std::string_view str)
{
auto const pos = std::find_if_not (std::rbegin (str), std::rend (str), ascii::is_space);
return { str.begin (), pos.base () };
}
inline std::string_view
strip (std::string_view str)
{
return lstrip (rstrip (str));
}
}
2011-08-12 00:25:59 +10:00
bool
strbegins(const char *restrict str,
const char *restrict prefix);
2016-03-17 18:13:19 +11:00
namespace cruft {
2019-02-20 15:51:54 +11:00
/// Provides an iterator interface over a string, breaking at each
/// occurence of a specific character.
///
/// It is up to the constructor to ensure the lifetime of the string.
/// This class simply provides an interface for iteration and doesn't
/// concern itself with lifetimes.
template <std::forward_iterator Iterator>
2016-03-17 18:13:19 +11:00
struct tokeniser {
public:
using value_type = view<Iterator>;
using element_type = typename std::iterator_traits<Iterator>::value_type;
2016-03-17 18:13:19 +11:00
tokeniser (cruft::view<Iterator,Iterator> _range, element_type _separator):
m_range (_range),
m_separator (_separator)
{ ; }
2016-03-17 18:13:19 +11:00
tokeniser (const char *_range, char _separator):
m_range (_range),
m_separator (_separator)
2017-09-15 15:22:29 +10:00
{ ; }
2021-11-18 12:41:23 +11:00
struct iterator {
2016-03-17 18:13:19 +11:00
public:
2021-11-18 12:41:23 +11:00
using iterator_category = std::forward_iterator_tag;
using value_type = tokeniser::value_type;
using difference_type = std::iter_difference_t<Iterator>;
2021-11-18 12:41:23 +11:00
using pointer = value_type*;
using reference = value_type&;
iterator
operator++ (int)
{
iterator res(*this);
++*this;
return res;
}
iterator& operator++ (void)&
{
if (m_data.end () == m_end) {
m_data = { m_data.end (), m_data.end () };
m_base = m_end;
return *this;
}
m_base = m_data.end ();
m_data = { m_data.end () + 1, std::find (m_data.end () + 1, m_end, m_separator) };
return *this;
}
iterator operator+ (int count)
{
return std::next (*this, count);
}
auto const&
operator* (void) const&
{
return m_data;
}
auto
operator-> (void) const&
{
return &m_data;
}
bool operator== (const iterator &rhs) const
{
CHECK_EQ (m_separator, rhs.m_separator);
return m_base == rhs.m_base;
}
bool operator!= (iterator const &rhs) const
{
return !(*this == rhs);
}
2016-03-17 18:13:19 +11:00
iterator (value_type _range, element_type _separator)
: m_separator (_separator)
, m_base (_range.begin ())
, m_data (_range.begin (), std::find (_range.begin (), _range.end (), _separator))
, m_end (_range.end ())
{ ; }
2016-03-18 11:08:12 +11:00
private:
element_type m_separator;
2016-03-18 11:08:12 +11:00
Iterator m_base;
view<Iterator> m_data;
Iterator m_end;
2016-03-17 18:13:19 +11:00
};
2019-02-20 15:51:54 +11:00
iterator cbegin (void) const { return { m_range, m_separator }; }
iterator begin (void) const { return { m_range, m_separator }; }
2016-03-17 18:13:19 +11:00
iterator cend (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
iterator end (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
2016-03-17 18:13:19 +11:00
2018-07-18 15:19:35 +10:00
std::size_t
size (void) const
{
return std::count (
m_range.begin (),
m_range.end (),
m_separator
) + 1u;
}
2019-02-20 14:44:33 +11:00
bool contains (std::string_view key) const noexcept
{
2019-02-20 15:51:54 +11:00
for (auto const &i: *this)
if (equal (i, key))
return true;
return false;
2019-02-20 14:44:33 +11:00
}
2016-03-17 18:13:19 +11:00
private:
const value_type m_range;
const element_type m_separator;
2016-03-17 18:13:19 +11:00
};
2016-03-18 11:08:12 +11:00
2017-09-15 15:22:29 +10:00
template <typename ContainerT>
tokeniser (ContainerT&, typename ContainerT::value_type) -> tokeniser<typename ContainerT::iterator>;
tokeniser (const char*,char) -> tokeniser<const char*>;
2017-09-15 15:22:29 +10:00
///////////////////////////////////////////////////////////////////////////
template <typename CharT, std::size_t LengthV>
auto
make_tokeniser (CharT (&data)[LengthV], CharT separator)
{
return tokeniser { std::begin (data), std::end (data), separator };
}
2016-03-17 18:13:19 +11:00
2011-08-12 00:25:59 +10:00
///////////////////////////////////////////////////////////////////////////
/// Calculate the line and column of an iterator within a view.
///
/// Returns an anonymous struct containing the line and column number.
///
/// The column number for newline is undefined. However it should never
/// return the value {0,-1} and so it should not result in underruns when
/// offsetting a pointer using the column index.
template <typename IteratorT>
auto
character_position (
cruft::view<IteratorT> data,
IteratorT const cursor
) {
using difference_type = typename std::iterator_traits<IteratorT>::difference_type;
difference_type line_count = 0;
auto current_line = data.begin ();
do {
auto next_line = std::find (current_line, data.end (), '\n');
if (next_line == data.end () || next_line > cursor)
break;
++line_count;
current_line = next_line + 1;
} while (1);
struct {
difference_type line;
difference_type column;
} val = {
.line = line_count,
.column = cursor - current_line,
};
return val;
}
2019-05-01 12:38:55 +10:00
///////////////////////////////////////////////////////////////////////////
/// A comparator for string-like objects that uses strcmp rather than
/// pointer comparison.
///
/// TODO: handle string and string_view objects
struct string_less {
template <
typename CharT,
typename = std::void_t<typename std::char_traits<CharT>::char_type>
>
bool operator() (CharT const *a, CharT const *b) const noexcept
{
return strcmp (a, b) < 0;
}
};
}
2019-05-02 11:03:22 +10:00
namespace cruft::string {
template <char (*TransformV)(char) noexcept>
struct less {
bool operator() (char const *a, char const *b) const noexcept
{
for (; *a && *b; ++a, ++b) {
auto const &a_val = TransformV (*a);
auto const &b_val = TransformV (*b);
if (a_val < b_val)
return true;
if (a_val > b_val)
return false;
}
return *a == '\0' && *b != '\0';
}
bool operator() (std::string const &a, std::string const & b) const noexcept
{
return (*this) (std::string_view (a), std::string_view (b));
}
bool operator() (std::string_view a, std::string_view b) const noexcept
{
auto a_cursor = a.begin ();
auto b_cursor = b.begin ();
for ( ; a_cursor != a.end () && b_cursor != b.end (); ++a_cursor, ++b_cursor) {
auto const &a_val = TransformV (*a_cursor);
auto const &b_val = TransformV (*b_cursor);
if (a_val < b_val)
return true;
if (a_val > b_val)
return false;
}
if (a_cursor == a.end () && b_cursor != b.end ())
return true;
return false;
}
using is_transparent = void;
template <typename A, typename B>
requires (!std::is_same_v<A,B>)
bool operator() (A &&a, B &&b) const noexcept
{
return (*this) (
std::string_view (a),
std::string_view (b)
);
}
};
using less_lower = less<ascii::try_lower>;
}
2021-11-22 11:42:19 +11:00
namespace cruft::string::equality {
2019-05-02 11:03:22 +10:00
///////////////////////////////////////////////////////////////////////////
/// A case comparator that tests equality on a string after a
/// per-character transform is applied.
///
/// Neither string will be modified.
///
/// \tparam TransformV A character transform function
template <char (*TransformV)(char) noexcept>
struct transform {
static bool operator() (
std::string_view const &a,
std::string_view const &b
) noexcept {
2019-05-02 11:03:22 +10:00
if (a.size () != b.size ())
return false;
for (auto i = a.begin (), j = b.begin (); i != a.end (); ++i, ++j)
if (TransformV (*i) != TransformV (*j))
return false;
return true;
}
};
using lower = transform<ascii::try_lower>;
using upper = transform<ascii::try_upper>;
}