2011-08-12 00:25:59 +10:00
|
|
|
/*
|
2018-08-04 15:14:06 +10:00
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
2011-08-12 00:25:59 +10:00
|
|
|
*
|
2018-10-16 18:01:31 +11:00
|
|
|
* Copyright 2011-2018 Danny Robson <danny@nerdcruft.net>
|
2011-08-12 00:25:59 +10:00
|
|
|
*/
|
|
|
|
|
2018-10-16 18:01:31 +11:00
|
|
|
#pragma once
|
2011-08-12 00:25:59 +10:00
|
|
|
|
2019-05-02 11:03:22 +10:00
|
|
|
#include "ascii.hpp"
|
2019-05-17 12:26:08 +10:00
|
|
|
#include "debug/assert.hpp"
|
2017-11-22 16:49:37 +11:00
|
|
|
#include "view.hpp"
|
2016-03-17 18:13:19 +11:00
|
|
|
|
2019-05-01 12:38:55 +10:00
|
|
|
#include <string>
|
|
|
|
#include <type_traits>
|
|
|
|
|
|
|
|
|
2018-08-05 14:42:02 +10:00
|
|
|
namespace cruft {
|
2016-11-22 21:48:57 +11:00
|
|
|
std::string to_utf8 (const wchar_t*);
|
|
|
|
std::string to_utf8 (const std::wstring&);
|
2019-11-26 07:49:23 +11:00
|
|
|
|
|
|
|
|
2019-11-26 08:10:04 +11:00
|
|
|
/// Apply a functor to each character of a string and return the result.
|
|
|
|
template <typename FunctionT>
|
|
|
|
std::string
|
|
|
|
transform (std::string &&val, FunctionT &&func)
|
2019-11-26 07:49:23 +11:00
|
|
|
{
|
|
|
|
std::transform (
|
|
|
|
std::begin (val),
|
|
|
|
std::end (val),
|
|
|
|
std::begin (val),
|
2019-11-26 08:10:04 +11:00
|
|
|
std::forward<FunctionT> (func)
|
2019-11-26 07:49:23 +11:00
|
|
|
);
|
2019-11-26 08:10:04 +11:00
|
|
|
|
2019-11-26 07:49:23 +11:00
|
|
|
return std::move (val);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-11-26 08:10:04 +11:00
|
|
|
/// Apply a functor to each character of a string and return the result.
|
|
|
|
template <typename FunctionT>
|
|
|
|
std::string
|
|
|
|
transform (std::string const &val, FunctionT &&func)
|
2019-11-26 07:49:23 +11:00
|
|
|
{
|
|
|
|
std::string res;
|
|
|
|
res.reserve (val.size ());
|
|
|
|
|
|
|
|
std::transform (
|
|
|
|
std::begin (val),
|
|
|
|
std::end (val),
|
|
|
|
std::back_inserter (res),
|
2019-11-26 08:10:04 +11:00
|
|
|
std::forward<FunctionT> (func)
|
2019-11-26 07:49:23 +11:00
|
|
|
);
|
|
|
|
|
|
|
|
return res;
|
2019-11-26 08:10:04 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Convert the provided string to all upper case
|
|
|
|
inline std::string
|
|
|
|
to_upper (std::string &&val)
|
|
|
|
{
|
|
|
|
return transform (std::move (val), ::toupper);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Convert the provided string to all upper case
|
|
|
|
inline std::string
|
|
|
|
to_upper (std::string const &val)
|
|
|
|
{
|
|
|
|
return transform (val, ::toupper);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Convert the provided string to all upper case
|
|
|
|
inline std::string
|
|
|
|
to_lower (std::string &&val)
|
|
|
|
{
|
|
|
|
return transform (std::move (val), ::tolower);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// Convert the provided string to all upper case
|
|
|
|
inline std::string
|
|
|
|
to_lower (std::string const &val)
|
|
|
|
{
|
|
|
|
return transform (val, ::tolower);
|
2019-11-26 07:49:23 +11:00
|
|
|
}
|
2016-11-22 21:48:57 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-08-12 00:25:59 +10:00
|
|
|
bool
|
|
|
|
strbegins(const char *restrict str,
|
|
|
|
const char *restrict prefix);
|
|
|
|
|
2016-03-17 18:13:19 +11:00
|
|
|
|
2018-08-05 14:42:02 +10:00
|
|
|
namespace cruft {
|
2019-02-20 15:51:54 +11:00
|
|
|
/// Provides an iterator interface over a string, breaking at each
|
|
|
|
/// occurence of a specific character.
|
|
|
|
///
|
|
|
|
/// It is up to the constructor to ensure the lifetime of the string.
|
|
|
|
/// This class simply provides an interface for iteration and doesn't
|
|
|
|
/// concern itself with lifetimes.
|
2016-03-18 11:08:12 +11:00
|
|
|
template <typename Iterator>
|
2016-03-17 18:13:19 +11:00
|
|
|
struct tokeniser {
|
|
|
|
public:
|
2019-06-20 16:31:22 +10:00
|
|
|
using value_type = view<Iterator>;
|
|
|
|
using element_type = typename std::iterator_traits<Iterator>::value_type;
|
2016-03-17 18:13:19 +11:00
|
|
|
|
2019-06-20 16:31:22 +10:00
|
|
|
tokeniser (cruft::view<Iterator,Iterator> _range, element_type _separator):
|
2018-01-30 11:31:40 +11:00
|
|
|
m_range (_range),
|
|
|
|
m_separator (_separator)
|
|
|
|
{ ; }
|
2016-03-17 18:13:19 +11:00
|
|
|
|
2018-01-30 11:31:40 +11:00
|
|
|
tokeniser (const char *_range, char _separator):
|
|
|
|
m_range (_range),
|
|
|
|
m_separator (_separator)
|
2017-09-15 15:22:29 +10:00
|
|
|
{ ; }
|
|
|
|
|
2019-02-20 15:51:54 +11:00
|
|
|
|
2017-09-15 15:22:29 +10:00
|
|
|
struct iterator : public std::iterator<
|
|
|
|
std::forward_iterator_tag,
|
2019-06-20 16:31:22 +10:00
|
|
|
value_type,
|
2017-09-15 15:22:29 +10:00
|
|
|
std::size_t
|
|
|
|
> {
|
2016-03-17 18:13:19 +11:00
|
|
|
public:
|
2018-01-30 11:31:40 +11:00
|
|
|
iterator operator++ (int)
|
|
|
|
{
|
|
|
|
iterator res(*this);
|
|
|
|
++*this;
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
iterator& operator++ (void)&
|
|
|
|
{
|
|
|
|
auto newend = m_range.cend ();
|
|
|
|
if (newend != m_end) {
|
|
|
|
CHECK_EQ (*m_range.cend (), m_separator);
|
|
|
|
newend++;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_range = {
|
|
|
|
newend,
|
|
|
|
std::find (newend, m_end, m_separator)
|
|
|
|
};
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2018-04-01 14:46:32 +10:00
|
|
|
iterator operator+ (int count)
|
|
|
|
{
|
2018-05-08 21:49:27 +10:00
|
|
|
return std::next (*this, count);
|
2018-04-01 14:46:32 +10:00
|
|
|
}
|
|
|
|
|
2019-02-20 15:51:54 +11:00
|
|
|
auto const& operator* (void) const& { return m_range; }
|
|
|
|
auto const* operator-> (void) const& { return &m_range; }
|
2018-04-01 14:44:43 +10:00
|
|
|
|
2018-01-30 11:31:40 +11:00
|
|
|
bool operator== (const iterator &rhs) const
|
|
|
|
{
|
|
|
|
return m_range == rhs.m_range && m_separator == rhs.m_separator;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool operator!= (const iterator &rhs) const
|
|
|
|
{
|
|
|
|
return !(*this == rhs);
|
|
|
|
}
|
2016-03-17 18:13:19 +11:00
|
|
|
|
|
|
|
private:
|
2019-06-20 16:31:22 +10:00
|
|
|
iterator (value_type _range, element_type _separator):
|
2018-01-30 11:31:40 +11:00
|
|
|
m_separator (_separator),
|
2019-02-20 15:51:54 +11:00
|
|
|
m_range {
|
|
|
|
_range.cbegin (),
|
|
|
|
std::find (_range.cbegin (), _range.cend (), _separator)
|
|
|
|
},
|
2018-01-30 11:31:40 +11:00
|
|
|
m_end (_range.cend ())
|
|
|
|
{ ; }
|
2016-03-18 11:08:12 +11:00
|
|
|
|
2019-06-20 16:31:22 +10:00
|
|
|
element_type m_separator;
|
|
|
|
value_type m_range;
|
2016-03-18 11:08:12 +11:00
|
|
|
Iterator m_end;
|
|
|
|
|
|
|
|
friend tokeniser;
|
2016-03-17 18:13:19 +11:00
|
|
|
};
|
|
|
|
|
2019-02-20 15:51:54 +11:00
|
|
|
|
2018-01-30 11:31:40 +11:00
|
|
|
iterator cbegin (void) const { return { m_range, m_separator }; }
|
|
|
|
iterator begin (void) const { return { m_range, m_separator }; }
|
2016-03-17 18:13:19 +11:00
|
|
|
|
2018-01-30 11:31:40 +11:00
|
|
|
iterator cend (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
|
|
|
|
iterator end (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; }
|
2016-03-17 18:13:19 +11:00
|
|
|
|
2018-07-18 15:19:35 +10:00
|
|
|
std::size_t
|
|
|
|
size (void) const
|
|
|
|
{
|
|
|
|
return std::count (
|
|
|
|
m_range.begin (),
|
|
|
|
m_range.end (),
|
|
|
|
m_separator
|
|
|
|
) + 1u;
|
|
|
|
}
|
|
|
|
|
2019-02-20 14:44:33 +11:00
|
|
|
bool contains (std::string_view key) const noexcept
|
|
|
|
{
|
2019-02-20 15:51:54 +11:00
|
|
|
for (auto const &i: *this)
|
|
|
|
if (equal (i, key))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
2019-02-20 14:44:33 +11:00
|
|
|
}
|
|
|
|
|
2016-03-17 18:13:19 +11:00
|
|
|
private:
|
2019-06-20 16:31:22 +10:00
|
|
|
const value_type m_range;
|
|
|
|
const element_type m_separator;
|
2016-03-17 18:13:19 +11:00
|
|
|
};
|
2016-03-18 11:08:12 +11:00
|
|
|
|
2017-09-15 15:22:29 +10:00
|
|
|
|
2018-01-30 11:31:40 +11:00
|
|
|
template <typename ContainerT>
|
|
|
|
tokeniser (ContainerT&, typename ContainerT::value_type) -> tokeniser<typename ContainerT::iterator>;
|
|
|
|
|
|
|
|
tokeniser (const char*,char) -> tokeniser<const char*>;
|
|
|
|
|
2017-09-15 15:22:29 +10:00
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
template <typename CharT, std::size_t LengthV>
|
|
|
|
auto
|
|
|
|
make_tokeniser (CharT (&data)[LengthV], CharT separator)
|
|
|
|
{
|
|
|
|
return tokeniser { std::begin (data), std::end (data), separator };
|
|
|
|
}
|
2016-03-17 18:13:19 +11:00
|
|
|
|
2011-08-12 00:25:59 +10:00
|
|
|
|
2018-10-16 18:01:31 +11:00
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
/// Calculate the line and column of an iterator within a view.
|
2018-10-17 11:48:03 +11:00
|
|
|
///
|
|
|
|
/// Returns an anonymous struct containing the line and column number.
|
|
|
|
///
|
|
|
|
/// The column number for newline is undefined. However it should never
|
|
|
|
/// return the value {0,-1} and so it should not result in underruns when
|
|
|
|
/// offsetting a pointer using the column index.
|
2018-10-16 18:01:31 +11:00
|
|
|
template <typename IteratorT>
|
|
|
|
auto
|
|
|
|
character_position (
|
|
|
|
cruft::view<IteratorT> data,
|
|
|
|
IteratorT const cursor
|
|
|
|
) {
|
|
|
|
using difference_type = typename std::iterator_traits<IteratorT>::difference_type;
|
|
|
|
difference_type line_count = 0;
|
|
|
|
auto current_line = data.begin ();
|
|
|
|
|
|
|
|
do {
|
|
|
|
auto next_line = std::find (current_line, data.end (), '\n');
|
|
|
|
if (next_line == data.end () || next_line > cursor)
|
|
|
|
break;
|
|
|
|
|
|
|
|
++line_count;
|
|
|
|
current_line = next_line + 1;
|
|
|
|
} while (1);
|
|
|
|
|
|
|
|
struct {
|
|
|
|
difference_type line;
|
|
|
|
difference_type column;
|
|
|
|
} val = {
|
|
|
|
.line = line_count,
|
|
|
|
.column = cursor - current_line,
|
|
|
|
};
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
2019-05-01 12:38:55 +10:00
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
/// A comparator for string-like objects that uses strcmp rather than
|
|
|
|
/// pointer comparison.
|
|
|
|
///
|
|
|
|
/// TODO: handle string and string_view objects
|
|
|
|
struct string_less {
|
|
|
|
template <
|
|
|
|
typename CharT,
|
|
|
|
typename = std::void_t<typename std::char_traits<CharT>::char_type>
|
|
|
|
>
|
|
|
|
bool operator() (CharT const *a, CharT const *b) const noexcept
|
|
|
|
{
|
|
|
|
return strcmp (a, b) < 0;
|
|
|
|
}
|
|
|
|
};
|
2018-10-16 18:01:31 +11:00
|
|
|
}
|
2019-05-02 11:03:22 +10:00
|
|
|
|
|
|
|
|
|
|
|
namespace cruft::string::compare {
|
|
|
|
///////////////////////////////////////////////////////////////////////////
|
|
|
|
/// A case comparator that tests equality on a string after a
|
|
|
|
/// per-character transform is applied.
|
|
|
|
///
|
|
|
|
/// Neither string will be modified.
|
|
|
|
///
|
|
|
|
/// \tparam TransformV A character transform function
|
|
|
|
template <char (*TransformV)(char) noexcept>
|
|
|
|
struct transform {
|
2019-10-10 15:12:15 +11:00
|
|
|
template <typename ContainerT>
|
|
|
|
bool operator() (
|
|
|
|
ContainerT const &a,
|
|
|
|
ContainerT const &b
|
|
|
|
) const noexcept {
|
2019-05-02 11:03:22 +10:00
|
|
|
if (a.size () != b.size ())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
for (auto i = a.begin (), j = b.begin (); i != a.end (); ++i, ++j)
|
|
|
|
if (TransformV (*i) != TransformV (*j))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
template <
|
|
|
|
typename CharT,
|
|
|
|
typename = std::void_t<
|
|
|
|
typename std::char_traits<CharT>::char_type
|
|
|
|
>
|
|
|
|
>
|
|
|
|
bool operator() (CharT const *a, CharT const *b) const noexcept
|
|
|
|
{
|
2019-10-10 15:12:15 +11:00
|
|
|
return (*this) (std::string_view (a), std::string_view (b));
|
2019-05-02 11:03:22 +10:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
using lower = transform<ascii::try_lower>;
|
|
|
|
using upper = transform<ascii::try_upper>;
|
|
|
|
}
|