libcruft-util/cruft/util/uri.hpp

177 lines
5.8 KiB
C++
Raw Permalink Normal View History

2015-02-11 16:18:18 +11:00
/*
2018-08-04 15:14:06 +10:00
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
2015-02-11 16:18:18 +11:00
*
* Copyright 2015, 2017, 2021 Danny Robson <danny@nerdcruft.net>
2015-02-11 16:18:18 +11:00
*/
2021-12-13 16:55:14 +11:00
#pragma once
2015-02-09 17:43:24 +11:00
#include "debug/assert.hpp"
#include "view.hpp"
#include <fmt/core.h>
#include <array>
2021-12-13 16:55:01 +11:00
#include <iosfwd>
#include <map>
#include <stdexcept>
#include <string>
#include <string_view>
2015-02-09 17:43:24 +11:00
namespace cruft {
// parsing of rfc3986 uniform resource identifiers
//
// does not currently perform normalisation (scheme or protocol),
// comparison, or other associated operations. though these should be
// added in the future.
//
// note that the parsed results may not always conform to expectations
// for some protocols. eg, mailto identifiers are complex to parse
// reliably and would require a specialised parser to be reliable.
//
// not all fields will be present for all protocols (or all instances of
// any given protocol). eg, the "tel" is unlikely to have port numbers.
2015-02-09 17:43:24 +11:00
class uri {
public:
2022-02-15 14:20:33 +11:00
explicit uri (std::string &&);
uri (uri &&) noexcept = default;
uri& operator= (uri &&) noexcept = default;
uri (uri const&) = default;
uri& operator= (uri const&) = default;
2022-02-15 14:20:33 +11:00
explicit uri (const std::string&);
explicit uri (const char *);
explicit uri (view<const char *>);
2015-02-09 17:43:24 +11:00
uri (
std::string_view scheme,
std::string_view authority,
std::string_view path,
std::string_view query,
std::string_view fragment
);
2015-02-09 17:43:24 +11:00
class parse_error : public std::runtime_error
{ using runtime_error::runtime_error; };
// URI: 'https://user:password@example.com:80/path/to?foo=bar#fragment'
//
// SCHEME: 'https'
// HIERARCHICAL: 'user:password@example.com:80/path/to'
// AUTHORITY: 'user:password@example.com:80'
// USER: 'user:password'
// HOST: 'example.com'
// PORT: '80'
// PATH: '/path/to'
// QUERY: 'foo=bar'
// FRAGMENT: 'fragment'
enum component {
/* 0 */ SCHEME,
/* 1 */ USER,
/* 2 */ HOST,
/* 3 */ PORT,
/* 4 */ PATH,
/* 5 */ QUERY,
/* 6 */ FRAGMENT,
2015-02-09 17:43:24 +11:00
NUM_COMPONENTS
};
std::string_view
get (component c) const&;
std::string_view all (void) const& { return m_value; }
2021-11-07 12:13:15 +11:00
std::string const& value (void) const& { return m_value; }
std::string_view scheme (void) const& { return get (SCHEME); }
std::string_view user (void) const& { return get (USER); }
std::string_view host (void) const& { return get (HOST); }
std::string_view port (void) const& { return get (PORT); }
std::string_view path (void) const& { return get (PATH); }
std::string_view query (void) const& { return get (QUERY); }
std::string_view fragment (void) const& { return get (FRAGMENT); }
std::string_view heirarchical (void) const&; //{ return { user ().begin (), path ().end () }; }
std::string_view authority (void) const&; //{ return { user ().begin (), port ().end () }; }
2022-07-19 17:03:39 +10:00
/// Returns a view over the path and query components
std::string_view
pq (void) const&
2022-02-25 11:52:09 +11:00
{
return {
m_value.data () + m_offsets[PATH].first,
m_value.data () + m_offsets[QUERY].second
2022-02-25 11:52:09 +11:00
};
}
2022-07-19 17:03:39 +10:00
/// Returns a view over the path, query, and fragment components
2022-02-16 15:09:58 +11:00
std::string_view
pqf (void) const& {
return {
m_value.data () + m_offsets[PATH].first,
m_value.data () + m_offsets[FRAGMENT].second
2022-02-16 15:09:58 +11:00
};
}
void set (component c, std::string_view val);
// void clear (component);
void clear_fragment (void);
2021-12-13 16:56:18 +11:00
std::array<std::string_view, NUM_COMPONENTS>
components (void) const& noexcept;
2015-02-09 17:43:24 +11:00
static std::string percent_decode (view<const char*>);
2015-02-09 17:43:24 +11:00
private:
void parse (void);
std::array<std::pair<int, int>, NUM_COMPONENTS> m_offsets;
2015-02-09 17:43:24 +11:00
std::string m_value;
};
/// Break a query string into a sequence of key-value pairs.
/// Duplicates are possible.
/// vector is used to that order can be preserved
/// The is the inverse of vector_to_query
std::vector<std::pair<std::string, std::string>>
query_to_vector (std::string_view);
/// Convert a sequence of key-value pairs into a query string
/// All pairs, including duplicates, are serialised in the provided order.
/// This is the inverse of query_to_vector
std::string
vector_to_query (std::vector<std::pair<std::string, std::string>> const&);
bool operator== (uri const&, uri const&) noexcept;
cruft::uri resolve (cruft::uri const &base, cruft::uri const &child);
cruft::uri normalise (cruft::uri const &);
2021-12-13 16:56:28 +11:00
2021-12-13 16:55:01 +11:00
std::ostream& operator<< (std::ostream&, uri const&);
2015-02-09 17:43:24 +11:00
std::ostream& operator<< (std::ostream&, uri::component);
}
// Don't use std::string_view here, fmtlib reimplements it as fmt::string_view.
template <>
struct fmt::formatter<cruft::uri> : public fmt::formatter<string_view> {
format_context::iterator
format (cruft::uri const&, format_context &ctx) const;
};
// Don't use std::string_view here, fmtlib reimplements it as fmt::string_view.
template <> struct fmt::formatter<cruft::uri::component>: formatter<string_view> {
// parse is inherited from formatter<string_view>.
fmt::format_context::iterator
format(cruft::uri::component c, format_context& ctx) const;
};