/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2011-2018 Danny Robson */ #pragma once #include "ascii.hpp" #include "debug.hpp" #include "view.hpp" #include #include namespace cruft { std::string to_utf8 (const wchar_t*); std::string to_utf8 (const std::wstring&); } bool strbegins(const char *restrict str, const char *restrict prefix); namespace cruft { /// Provides an iterator interface over a string, breaking at each /// occurence of a specific character. /// /// It is up to the constructor to ensure the lifetime of the string. /// This class simply provides an interface for iteration and doesn't /// concern itself with lifetimes. template struct tokeniser { public: using value_type = typename std::iterator_traits::value_type; using range_type = view; tokeniser (cruft::view _range, value_type _separator): m_range (_range), m_separator (_separator) { ; } tokeniser (const char *_range, char _separator): m_range (_range), m_separator (_separator) { ; } struct iterator : public std::iterator< std::forward_iterator_tag, range_type, std::size_t > { public: iterator operator++ (int) { iterator res(*this); ++*this; return res; } iterator& operator++ (void)& { auto newend = m_range.cend (); if (newend != m_end) { CHECK_EQ (*m_range.cend (), m_separator); newend++; } m_range = { newend, std::find (newend, m_end, m_separator) }; return *this; } iterator operator+ (int count) { return std::next (*this, count); } auto const& operator* (void) const& { return m_range; } auto const* operator-> (void) const& { return &m_range; } bool operator== (const iterator &rhs) const { return m_range == rhs.m_range && m_separator == rhs.m_separator; } bool operator!= (const iterator &rhs) const { return !(*this == rhs); } private: iterator (range_type _range, value_type _separator): m_separator (_separator), m_range { _range.cbegin (), std::find (_range.cbegin (), _range.cend (), _separator) }, m_end (_range.cend ()) { ; } value_type m_separator; range_type m_range; Iterator m_end; friend tokeniser; }; iterator cbegin (void) const { return { m_range, m_separator }; } iterator begin (void) const { return { m_range, m_separator }; } iterator cend (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; } iterator end (void) const { return { { m_range.cend (), m_range.cend () }, m_separator }; } std::size_t size (void) const { return std::count ( m_range.begin (), m_range.end (), m_separator ) + 1u; } bool contains (std::string_view key) const noexcept { for (auto const &i: *this) if (equal (i, key)) return true; return false; } private: const range_type m_range; const value_type m_separator; }; template tokeniser (ContainerT&, typename ContainerT::value_type) -> tokeniser; tokeniser (const char*,char) -> tokeniser; /////////////////////////////////////////////////////////////////////////// template auto make_tokeniser (CharT (&data)[LengthV], CharT separator) { return tokeniser { std::begin (data), std::end (data), separator }; } /////////////////////////////////////////////////////////////////////////// /// Calculate the line and column of an iterator within a view. /// /// Returns an anonymous struct containing the line and column number. /// /// The column number for newline is undefined. However it should never /// return the value {0,-1} and so it should not result in underruns when /// offsetting a pointer using the column index. template auto character_position ( cruft::view data, IteratorT const cursor ) { using difference_type = typename std::iterator_traits::difference_type; difference_type line_count = 0; auto current_line = data.begin (); do { auto next_line = std::find (current_line, data.end (), '\n'); if (next_line == data.end () || next_line > cursor) break; ++line_count; current_line = next_line + 1; } while (1); struct { difference_type line; difference_type column; } val = { .line = line_count, .column = cursor - current_line, }; return val; } /////////////////////////////////////////////////////////////////////////// /// A comparator for string-like objects that uses strcmp rather than /// pointer comparison. /// /// TODO: handle string and string_view objects struct string_less { template < typename CharT, typename = std::void_t::char_type> > bool operator() (CharT const *a, CharT const *b) const noexcept { return strcmp (a, b) < 0; } }; } namespace cruft::string::compare { /////////////////////////////////////////////////////////////////////////// /// A case comparator that tests equality on a string after a /// per-character transform is applied. /// /// Neither string will be modified. /// /// \tparam TransformV A character transform function template struct transform { template < typename CharT, typename = std::void_t< typename std::char_traits::char_type > > bool operator() (cruft::view a, cruft::view b) { if (a.size () != b.size ()) return false; for (auto i = a.begin (), j = b.begin (); i != a.end (); ++i, ++j) if (TransformV (*i) != TransformV (*j)) return false; return true; } template < typename CharT, typename = std::void_t< typename std::char_traits::char_type > > bool operator() (CharT const *a, CharT const *b) const noexcept { auto const *i = a; auto const *j = b; for ( ; *i && *j; ++i, ++j) if (TransformV (*i) != TransformV (*j)) return false; // Ensure we've reached the ends of both strings return !*i && !*j; } }; using lower = transform; using upper = transform; }