libcruft-util/cruft/util/string.hpp

422 lines
11 KiB
C++

/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 2011-2018 Danny Robson <danny@nerdcruft.net>
*/
#pragma once
#include "ascii.hpp"
#include "debug/assert.hpp"
#include "view.hpp"
#include <string>
#include <type_traits>
namespace cruft {
std::string to_utf8 (const wchar_t*);
std::string to_utf8 (const std::wstring&);
/// Apply a functor to each character of a string and return the result.
template <typename FunctionT>
std::string
transform (std::string &&val, FunctionT &&func)
{
std::transform (
std::begin (val),
std::end (val),
std::begin (val),
std::forward<FunctionT> (func)
);
return std::move (val);
}
/// Apply a functor to each character of a string and return the result.
template <typename FunctionT>
std::string
transform (std::string_view val, FunctionT &&func)
{
std::string res;
res.reserve (val.size ());
std::transform (
std::begin (val),
std::end (val),
std::back_inserter (res),
std::forward<FunctionT> (func)
);
return res;
}
inline std::string
to_upper (std::string_view const &val)
{
return transform (val, ::toupper);
}
template <std::size_t N>
inline std::string
to_upper (char const (&val)[N])
{
return to_upper (std::string_view (val));
}
/// Convert the provided string to all upper case
inline std::string
to_upper (std::string &&val)
{
return transform (std::move (val), ::toupper);
}
/// Convert the provided string to all upper case
inline std::string
to_upper (std::string const &val)
{
return transform (val, ::toupper);
}
/// Convert the provided string to all upper case
inline std::string
to_lower (std::string &&val)
{
return transform (std::move (val), ::tolower);
}
/// Convert the provided string to all upper case
inline std::string
to_lower (std::string const &val)
{
return transform (val, ::tolower);
}
inline std::string_view
lstrip (std::string_view str)
{
auto const pos = std::find_if_not (std::begin (str), std::end (str), ascii::is_space);
return { pos, std::end (str) };
}
inline std::string_view
rstrip (std::string_view str)
{
auto const pos = std::find_if_not (std::rbegin (str), std::rend (str), ascii::is_space);
return { str.begin (), pos.base () };
}
inline std::string_view
strip (std::string_view str)
{
return lstrip (rstrip (str));
}
}
bool
strbegins(const char *restrict str,
const char *restrict prefix);
namespace cruft {
/// Provides an iterator interface over a string, breaking at each
/// occurence of a specific character.
///
/// It is up to the constructor to ensure the lifetime of the string.
/// This class simply provides an interface for iteration and doesn't
/// concern itself with lifetimes.
template <std::forward_iterator Iterator>
struct tokeniser {
public:
using value_type = view<Iterator>;
using element_type = typename std::iterator_traits<Iterator>::value_type;
tokeniser (cruft::view<Iterator,Iterator> _range, element_type _separator):
m_range (_range),
m_separator (_separator)
{ ; }
tokeniser (const char *_range, char _separator):
m_range (_range),
m_separator (_separator)
{ ; }
struct iterator {
public:
using iterator_category = std::forward_iterator_tag;
using value_type = tokeniser::value_type;
using difference_type = std::iter_difference_t<Iterator>;
using pointer = value_type*;
using reference = value_type&;
iterator
operator++ (int)
{
iterator res(*this);
++*this;
return res;
}
iterator& operator++ (void)&
{
if (m_data.end () == m_end) {
m_data = { m_data.end (), m_data.end () };
m_base = m_end;
return *this;
}
m_base = m_data.end ();
m_data = { m_data.end () + 1, std::find (m_data.end () + 1, m_end, m_separator) };
return *this;
}
iterator operator+ (int count)
{
return std::next (*this, count);
}
auto const&
operator* (void) const&
{
return m_data;
}
auto
operator-> (void) const&
{
return &m_data;
}
bool operator== (const iterator &rhs) const
{
CHECK_EQ (m_separator, rhs.m_separator);
return m_base == rhs.m_base;
}
bool operator!= (iterator const &rhs) const
{
return !(*this == rhs);
}
iterator (value_type _range, element_type _separator)
: m_separator (_separator)
, m_base (_range.begin ())
, m_data (_range.begin (), std::find (_range.begin (), _range.end (), _separator))
, m_end (_range.end ())
{ ; }
private:
element_type m_separator;
Iterator m_base;
view<Iterator> m_data;
Iterator m_end;
};
iterator cbegin (void) const { return { m_range, m_separator }; }
iterator begin (void) const { return { m_range, m_separator }; }
iterator cend (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
iterator end (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
std::size_t
size (void) const
{
return std::count (
m_range.begin (),
m_range.end (),
m_separator
) + 1u;
}
bool contains (std::string_view key) const noexcept
{
for (auto const &i: *this)
if (equal (i, key))
return true;
return false;
}
private:
const value_type m_range;
const element_type m_separator;
};
template <typename ContainerT>
tokeniser (ContainerT&, typename ContainerT::value_type) -> tokeniser<typename ContainerT::iterator>;
tokeniser (const char*,char) -> tokeniser<const char*>;
///////////////////////////////////////////////////////////////////////////
template <typename CharT, std::size_t LengthV>
auto
make_tokeniser (CharT (&data)[LengthV], CharT separator)
{
return tokeniser { std::begin (data), std::end (data), separator };
}
///////////////////////////////////////////////////////////////////////////
/// Calculate the line and column of an iterator within a view.
///
/// Returns an anonymous struct containing the line and column number.
///
/// The column number for newline is undefined. However it should never
/// return the value {0,-1} and so it should not result in underruns when
/// offsetting a pointer using the column index.
template <typename IteratorT>
auto
character_position (
cruft::view<IteratorT> data,
IteratorT const cursor
) {
using difference_type = typename std::iterator_traits<IteratorT>::difference_type;
difference_type line_count = 0;
auto current_line = data.begin ();
do {
auto next_line = std::find (current_line, data.end (), '\n');
if (next_line == data.end () || next_line > cursor)
break;
++line_count;
current_line = next_line + 1;
} while (1);
struct {
difference_type line;
difference_type column;
} val = {
.line = line_count,
.column = cursor - current_line,
};
return val;
}
///////////////////////////////////////////////////////////////////////////
/// A comparator for string-like objects that uses strcmp rather than
/// pointer comparison.
///
/// TODO: handle string and string_view objects
struct string_less {
template <
typename CharT,
typename = std::void_t<typename std::char_traits<CharT>::char_type>
>
bool operator() (CharT const *a, CharT const *b) const noexcept
{
return strcmp (a, b) < 0;
}
};
}
namespace cruft::string {
template <char (*TransformV)(char) noexcept>
struct less {
bool operator() (char const *a, char const *b) const noexcept
{
for (; *a && *b; ++a, ++b) {
auto const &a_val = TransformV (*a);
auto const &b_val = TransformV (*b);
if (a_val < b_val)
return true;
if (a_val > b_val)
return false;
}
return *a == '\0' && *b != '\0';
}
bool operator() (std::string const &a, std::string const & b) const noexcept
{
return (*this) (std::string_view (a), std::string_view (b));
}
bool operator() (std::string_view a, std::string_view b) const noexcept
{
auto a_cursor = a.begin ();
auto b_cursor = b.begin ();
for ( ; a_cursor != a.end () && b_cursor != b.end (); ++a_cursor, ++b_cursor) {
auto const &a_val = TransformV (*a_cursor);
auto const &b_val = TransformV (*b_cursor);
if (a_val < b_val)
return true;
if (a_val > b_val)
return false;
}
if (a_cursor == a.end () && b_cursor != b.end ())
return true;
return false;
}
using is_transparent = void;
template <typename A, typename B>
requires (!std::is_same_v<A,B>)
bool operator() (A &&a, B &&b) const noexcept
{
return (*this) (
std::string_view (a),
std::string_view (b)
);
}
};
using less_lower = less<ascii::try_lower>;
}
namespace cruft::string::equality {
///////////////////////////////////////////////////////////////////////////
/// A case comparator that tests equality on a string after a
/// per-character transform is applied.
///
/// Neither string will be modified.
///
/// \tparam TransformV A character transform function
template <char (*TransformV)(char) noexcept>
struct transform {
static bool operator() (
std::string_view const &a,
std::string_view const &b
) noexcept {
if (a.size () != b.size ())
return false;
for (auto i = a.begin (), j = b.begin (); i != a.end (); ++i, ++j)
if (TransformV (*i) != TransformV (*j))
return false;
return true;
}
};
using lower = transform<ascii::try_lower>;
using upper = transform<ascii::try_upper>;
}