/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2011-2018 Danny Robson */ #pragma once #include "ascii.hpp" #include "debug/assert.hpp" #include "view.hpp" #include #include namespace cruft { std::string to_utf8 (const wchar_t*); std::string to_utf8 (const std::wstring&); /// Apply a functor to each character of a string and return the result. template std::string transform (std::string &&val, FunctionT &&func) { std::transform ( std::begin (val), std::end (val), std::begin (val), std::forward (func) ); return std::move (val); } /// Apply a functor to each character of a string and return the result. template std::string transform (std::string_view val, FunctionT &&func) { std::string res; res.reserve (val.size ()); std::transform ( std::begin (val), std::end (val), std::back_inserter (res), std::forward (func) ); return res; } inline std::string to_upper (std::string_view const &val) { return transform (val, ::toupper); } template inline std::string to_upper (char const (&val)[N]) { return to_upper (std::string_view (val)); } /// Convert the provided string to all upper case inline std::string to_upper (std::string &&val) { return transform (std::move (val), ::toupper); } /// Convert the provided string to all upper case inline std::string to_upper (std::string const &val) { return transform (val, ::toupper); } /// Convert the provided string to all upper case inline std::string to_lower (std::string &&val) { return transform (std::move (val), ::tolower); } /// Convert the provided string to all upper case inline std::string to_lower (std::string const &val) { return transform (val, ::tolower); } inline std::string_view lstrip (std::string_view str) { auto const pos = std::find_if_not (std::begin (str), std::end (str), ascii::is_space); return { pos, std::end (str) }; } inline std::string_view rstrip (std::string_view str) { auto const pos = std::find_if_not (std::rbegin (str), std::rend (str), ascii::is_space); return { str.begin (), pos.base () }; } inline std::string_view strip (std::string_view str) { return lstrip (rstrip (str)); } } bool strbegins(const char *restrict str, const char *restrict prefix); namespace cruft { /// Provides an iterator interface over a string, breaking at each /// occurence of a specific character. /// /// It is up to the constructor to ensure the lifetime of the string. /// This class simply provides an interface for iteration and doesn't /// concern itself with lifetimes. template struct tokeniser { public: using value_type = view; using element_type = typename std::iterator_traits::value_type; tokeniser (cruft::view _range, element_type _separator): m_range (_range), m_separator (_separator) { ; } tokeniser (const char *_range, char _separator): m_range (_range), m_separator (_separator) { ; } struct iterator { public: using iterator_category = std::forward_iterator_tag; using value_type = tokeniser::value_type; using difference_type = std::iter_difference_t; using pointer = value_type*; using reference = value_type&; iterator operator++ (int) { iterator res(*this); ++*this; return res; } iterator& operator++ (void)& { if (m_data.end () == m_end) { m_data = { m_data.end (), m_data.end () }; m_base = m_end; return *this; } m_base = m_data.end (); m_data = { m_data.end () + 1, std::find (m_data.end () + 1, m_end, m_separator) }; return *this; } iterator operator+ (int count) { return std::next (*this, count); } auto const& operator* (void) const& { return m_data; } auto operator-> (void) const& { return &m_data; } bool operator== (const iterator &rhs) const { CHECK_EQ (m_separator, rhs.m_separator); return m_base == rhs.m_base; } bool operator!= (iterator const &rhs) const { return !(*this == rhs); } iterator (value_type _range, element_type _separator) : m_separator (_separator) , m_base (_range.begin ()) , m_data (_range.begin (), std::find (_range.begin (), _range.end (), _separator)) , m_end (_range.end ()) { ; } private: element_type m_separator; Iterator m_base; view m_data; Iterator m_end; }; iterator cbegin (void) const { return { m_range, m_separator }; } iterator begin (void) const { return { m_range, m_separator }; } iterator cend (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; } iterator end (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; } std::size_t size (void) const { return std::count ( m_range.begin (), m_range.end (), m_separator ) + 1u; } bool contains (std::string_view key) const noexcept { for (auto const &i: *this) if (equal (i, key)) return true; return false; } private: const value_type m_range; const element_type m_separator; }; template tokeniser (ContainerT&, typename ContainerT::value_type) -> tokeniser; tokeniser (const char*,char) -> tokeniser; /////////////////////////////////////////////////////////////////////////// template auto make_tokeniser (CharT (&data)[LengthV], CharT separator) { return tokeniser { std::begin (data), std::end (data), separator }; } /////////////////////////////////////////////////////////////////////////// /// Calculate the line and column of an iterator within a view. /// /// Returns an anonymous struct containing the line and column number. /// /// The column number for newline is undefined. However it should never /// return the value {0,-1} and so it should not result in underruns when /// offsetting a pointer using the column index. template auto character_position ( cruft::view data, IteratorT const cursor ) { using difference_type = typename std::iterator_traits::difference_type; difference_type line_count = 0; auto current_line = data.begin (); do { auto next_line = std::find (current_line, data.end (), '\n'); if (next_line == data.end () || next_line > cursor) break; ++line_count; current_line = next_line + 1; } while (1); struct { difference_type line; difference_type column; } val = { .line = line_count, .column = cursor - current_line, }; return val; } /////////////////////////////////////////////////////////////////////////// /// A comparator for string-like objects that uses strcmp rather than /// pointer comparison. /// /// TODO: handle string and string_view objects struct string_less { template < typename CharT, typename = std::void_t::char_type> > bool operator() (CharT const *a, CharT const *b) const noexcept { return strcmp (a, b) < 0; } }; } namespace cruft::string { template struct less { bool operator() (char const *a, char const *b) const noexcept { for (; *a && *b; ++a, ++b) { auto const &a_val = TransformV (*a); auto const &b_val = TransformV (*b); if (a_val < b_val) return true; if (a_val > b_val) return false; } return *a == '\0' && *b != '\0'; } bool operator() (std::string const &a, std::string const & b) const noexcept { return (*this) (std::string_view (a), std::string_view (b)); } bool operator() (std::string_view a, std::string_view b) const noexcept { auto a_cursor = a.begin (); auto b_cursor = b.begin (); for ( ; a_cursor != a.end () && b_cursor != b.end (); ++a_cursor, ++b_cursor) { auto const &a_val = TransformV (*a_cursor); auto const &b_val = TransformV (*b_cursor); if (a_val < b_val) return true; if (a_val > b_val) return false; } if (a_cursor == a.end () && b_cursor != b.end ()) return true; return false; } using is_transparent = void; template requires (!std::is_same_v) bool operator() (A &&a, B &&b) const noexcept { return (*this) ( std::string_view (a), std::string_view (b) ); } }; using less_lower = less; } namespace cruft::string::equality { /////////////////////////////////////////////////////////////////////////// /// A case comparator that tests equality on a string after a /// per-character transform is applied. /// /// Neither string will be modified. /// /// \tparam TransformV A character transform function template struct transform { static bool operator() ( std::string_view const &a, std::string_view const &b ) noexcept { if (a.size () != b.size ()) return false; for (auto i = a.begin (), j = b.begin (); i != a.end (); ++i, ++j) if (TransformV (*i) != TransformV (*j)) return false; return true; } }; using lower = transform; using upper = transform; }