libcruft-util/string.hpp

262 lines
7.5 KiB
C++

/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 2011-2018 Danny Robson <danny@nerdcruft.net>
*/
#pragma once
#include "ascii.hpp"
#include "debug/assert.hpp"
#include "view.hpp"
#include <string>
#include <type_traits>
namespace cruft {
std::string to_utf8 (const wchar_t*);
std::string to_utf8 (const std::wstring&);
}
bool
strbegins(const char *restrict str,
const char *restrict prefix);
namespace cruft {
/// Provides an iterator interface over a string, breaking at each
/// occurence of a specific character.
///
/// It is up to the constructor to ensure the lifetime of the string.
/// This class simply provides an interface for iteration and doesn't
/// concern itself with lifetimes.
template <typename Iterator>
struct tokeniser {
public:
using value_type = view<Iterator>;
using element_type = typename std::iterator_traits<Iterator>::value_type;
tokeniser (cruft::view<Iterator,Iterator> _range, element_type _separator):
m_range (_range),
m_separator (_separator)
{ ; }
tokeniser (const char *_range, char _separator):
m_range (_range),
m_separator (_separator)
{ ; }
struct iterator : public std::iterator<
std::forward_iterator_tag,
value_type,
std::size_t
> {
public:
iterator operator++ (int)
{
iterator res(*this);
++*this;
return res;
}
iterator& operator++ (void)&
{
auto newend = m_range.cend ();
if (newend != m_end) {
CHECK_EQ (*m_range.cend (), m_separator);
newend++;
}
m_range = {
newend,
std::find (newend, m_end, m_separator)
};
return *this;
}
iterator operator+ (int count)
{
return std::next (*this, count);
}
auto const& operator* (void) const& { return m_range; }
auto const* operator-> (void) const& { return &m_range; }
bool operator== (const iterator &rhs) const
{
return m_range == rhs.m_range && m_separator == rhs.m_separator;
}
bool operator!= (const iterator &rhs) const
{
return !(*this == rhs);
}
private:
iterator (value_type _range, element_type _separator):
m_separator (_separator),
m_range {
_range.cbegin (),
std::find (_range.cbegin (), _range.cend (), _separator)
},
m_end (_range.cend ())
{ ; }
element_type m_separator;
value_type m_range;
Iterator m_end;
friend tokeniser;
};
iterator cbegin (void) const { return { m_range, m_separator }; }
iterator begin (void) const { return { m_range, m_separator }; }
iterator cend (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
iterator end (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
std::size_t
size (void) const
{
return std::count (
m_range.begin (),
m_range.end (),
m_separator
) + 1u;
}
bool contains (std::string_view key) const noexcept
{
for (auto const &i: *this)
if (equal (i, key))
return true;
return false;
}
private:
const value_type m_range;
const element_type m_separator;
};
template <typename ContainerT>
tokeniser (ContainerT&, typename ContainerT::value_type) -> tokeniser<typename ContainerT::iterator>;
tokeniser (const char*,char) -> tokeniser<const char*>;
///////////////////////////////////////////////////////////////////////////
template <typename CharT, std::size_t LengthV>
auto
make_tokeniser (CharT (&data)[LengthV], CharT separator)
{
return tokeniser { std::begin (data), std::end (data), separator };
}
///////////////////////////////////////////////////////////////////////////
/// Calculate the line and column of an iterator within a view.
///
/// Returns an anonymous struct containing the line and column number.
///
/// The column number for newline is undefined. However it should never
/// return the value {0,-1} and so it should not result in underruns when
/// offsetting a pointer using the column index.
template <typename IteratorT>
auto
character_position (
cruft::view<IteratorT> data,
IteratorT const cursor
) {
using difference_type = typename std::iterator_traits<IteratorT>::difference_type;
difference_type line_count = 0;
auto current_line = data.begin ();
do {
auto next_line = std::find (current_line, data.end (), '\n');
if (next_line == data.end () || next_line > cursor)
break;
++line_count;
current_line = next_line + 1;
} while (1);
struct {
difference_type line;
difference_type column;
} val = {
.line = line_count,
.column = cursor - current_line,
};
return val;
}
///////////////////////////////////////////////////////////////////////////
/// A comparator for string-like objects that uses strcmp rather than
/// pointer comparison.
///
/// TODO: handle string and string_view objects
struct string_less {
template <
typename CharT,
typename = std::void_t<typename std::char_traits<CharT>::char_type>
>
bool operator() (CharT const *a, CharT const *b) const noexcept
{
return strcmp (a, b) < 0;
}
};
}
namespace cruft::string::compare {
///////////////////////////////////////////////////////////////////////////
/// A case comparator that tests equality on a string after a
/// per-character transform is applied.
///
/// Neither string will be modified.
///
/// \tparam TransformV A character transform function
template <char (*TransformV)(char) noexcept>
struct transform {
template <typename ContainerT>
bool operator() (
ContainerT const &a,
ContainerT const &b
) const noexcept {
if (a.size () != b.size ())
return false;
for (auto i = a.begin (), j = b.begin (); i != a.end (); ++i, ++j)
if (TransformV (*i) != TransformV (*j))
return false;
return true;
}
template <
typename CharT,
typename = std::void_t<
typename std::char_traits<CharT>::char_type
>
>
bool operator() (CharT const *a, CharT const *b) const noexcept
{
return (*this) (std::string_view (a), std::string_view (b));
}
};
using lower = transform<ascii::try_lower>;
using upper = transform<ascii::try_upper>;
}