2021-12-14 12:13:50 +11:00
|
|
|
#include "./uri.hpp"
|
|
|
|
|
2021-12-17 11:46:59 +11:00
|
|
|
#include "./string.hpp"
|
|
|
|
|
2021-12-14 12:13:50 +11:00
|
|
|
#include "./debug/panic.hpp"
|
|
|
|
|
2021-12-17 11:46:59 +11:00
|
|
|
#include <ostream>
|
|
|
|
|
2021-12-14 12:13:50 +11:00
|
|
|
using cruft::uri;
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
2021-12-21 14:56:28 +11:00
|
|
|
#include <cruft/util/debug/assert.hpp>
|
|
|
|
#include <cruft/util/debug/warn.hpp>
|
|
|
|
#include <cruft/util/iterator/zip.hpp>
|
|
|
|
|
|
|
|
#include <optional>
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
using uric = cruft::uri::component;
|
|
|
|
|
|
|
|
|
|
|
|
struct {
|
|
|
|
cruft::uri::component self;
|
|
|
|
// prev sibling
|
|
|
|
std::optional<cruft::uri::component> prev;
|
|
|
|
// parent
|
|
|
|
std::optional<cruft::uri::component> parent;
|
|
|
|
} COMPONENT_ORDER[cruft::uri::NUM_COMPONENTS] {
|
|
|
|
{ uric::SCHEME, { }, {} },
|
|
|
|
{ uric::HIERARCHICAL, {uric::SCHEME }, {} },
|
|
|
|
{ uric::QUERY, {uric::HIERARCHICAL}, {} },
|
|
|
|
{ uric::FRAGMENT, {uric::QUERY }, {} },
|
|
|
|
|
|
|
|
{ uric::AUTHORITY, { }, {uric::HIERARCHICAL} },
|
|
|
|
{ uric::PATH, {uric::AUTHORITY}, {uric::HIERARCHICAL} },
|
|
|
|
|
|
|
|
{ uric::USER, { }, {uric::AUTHORITY} },
|
|
|
|
{ uric::HOST, {uric::USER}, {uric::AUTHORITY} },
|
|
|
|
{ uric::PORT, {uric::HOST}, {uric::AUTHORITY} },
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
cruft::uri::uri (std::string &&_value):
|
|
|
|
m_views {
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
nullptr
|
|
|
|
},
|
|
|
|
m_value (std::move (_value))
|
|
|
|
{
|
|
|
|
parse ();
|
|
|
|
|
|
|
|
for (auto const &order: COMPONENT_ORDER) {
|
|
|
|
if (m_views[order.self].data ())
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (order.prev)
|
|
|
|
m_views[order.self] = { m_views[*order.prev].end (), 0 };
|
|
|
|
else if (order.parent)
|
|
|
|
m_views[order.self] = { m_views[*order.parent].begin (), 0 };
|
|
|
|
else
|
|
|
|
m_views[order.self] = { m_value.data (), 0 };
|
|
|
|
}
|
|
|
|
|
|
|
|
CHECK_SANITY (*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
2021-12-14 12:13:50 +11:00
|
|
|
cruft::uri::uri (const char *str):
|
|
|
|
uri (std::string (str))
|
|
|
|
{ ; }
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
uri::uri (cruft::view<const char *> _value):
|
|
|
|
uri (std::string (_value.begin (), _value.end ()))
|
|
|
|
{ ; }
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
uri::uri (const std::string &_value):
|
|
|
|
uri (std::string (_value))
|
|
|
|
{ ; }
|
|
|
|
|
|
|
|
|
2021-12-14 12:31:20 +11:00
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
uri::uri (uri const &rhs)
|
|
|
|
: m_views (rhs.m_views)
|
|
|
|
, m_value (rhs.m_value)
|
|
|
|
{
|
|
|
|
auto const offset = rhs.m_value.data () - m_value.data ();
|
2021-12-21 14:56:28 +11:00
|
|
|
|
2021-12-14 12:31:20 +11:00
|
|
|
for (auto &i: m_views)
|
2021-12-21 14:56:28 +11:00
|
|
|
i -= offset;
|
|
|
|
|
|
|
|
CHECK_SANITY (*this);
|
2021-12-14 12:31:20 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
uri& uri::operator= (uri &&rhs) noexcept
|
|
|
|
{
|
|
|
|
m_views = std::move (rhs.m_views);
|
|
|
|
m_value = std::move (rhs.m_value);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-12-17 11:46:59 +11:00
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static std::string
|
|
|
|
combine_components (
|
|
|
|
std::string_view scheme,
|
|
|
|
std::string_view authority,
|
|
|
|
std::string_view path,
|
|
|
|
std::string_view query,
|
|
|
|
std::string_view fragment
|
|
|
|
) {
|
|
|
|
std::string res;
|
|
|
|
res.reserve (
|
|
|
|
scheme.size () +
|
|
|
|
strlen ("://") + authority.size () +
|
|
|
|
path.size () +
|
|
|
|
strlen ("?") + query.size () +
|
|
|
|
strlen ("#") + fragment.size ()
|
|
|
|
);
|
|
|
|
|
|
|
|
if (!scheme.empty ()) {
|
|
|
|
res += scheme;
|
|
|
|
res += ":";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!authority.empty ()) {
|
|
|
|
res += "//";
|
|
|
|
res += authority;
|
|
|
|
}
|
|
|
|
|
|
|
|
res += path;
|
|
|
|
|
|
|
|
if (!query.empty ()) {
|
|
|
|
res += "?";
|
|
|
|
res += query;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!fragment.empty ()) {
|
|
|
|
res += "#";
|
|
|
|
res += fragment;
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
uri::uri (
|
|
|
|
std::string_view scheme,
|
|
|
|
std::string_view authority,
|
|
|
|
std::string_view path,
|
|
|
|
std::string_view query,
|
|
|
|
std::string_view fragment
|
|
|
|
) : uri (combine_components (scheme, authority, path, query, fragment))
|
|
|
|
{ ; }
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::string_view
|
|
|
|
uri::get (component c) const&
|
|
|
|
{
|
|
|
|
CHECK_INDEX (c, NUM_COMPONENTS);
|
|
|
|
return { m_views[c].data (), m_views[c].size () };
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
void
|
|
|
|
uri::set (component c, std::string_view val)
|
|
|
|
{
|
|
|
|
auto const diff = val.size () - m_views[c].size ();
|
|
|
|
m_value.replace (
|
|
|
|
m_views[c].data () - m_value.data (),
|
|
|
|
m_views[c].size (),
|
|
|
|
val
|
|
|
|
);
|
|
|
|
for (int i = c + 1; i != component::NUM_COMPONENTS; ++i)
|
|
|
|
m_views[i] += diff;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
2021-12-21 10:42:13 +11:00
|
|
|
void uri::clear_fragment ()
|
2021-12-17 11:46:59 +11:00
|
|
|
{
|
2021-12-21 10:42:13 +11:00
|
|
|
if (!m_views[FRAGMENT])
|
|
|
|
return;
|
2021-12-17 11:46:59 +11:00
|
|
|
|
|
|
|
m_value.erase (
|
2021-12-21 10:42:13 +11:00
|
|
|
m_views[FRAGMENT].begin () - m_value.data (),
|
|
|
|
m_views[FRAGMENT].size ()
|
2021-12-17 11:46:59 +11:00
|
|
|
);
|
2021-12-21 10:42:13 +11:00
|
|
|
m_views[FRAGMENT] = nullptr;
|
2021-12-17 11:46:59 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
2022-01-27 14:51:40 +11:00
|
|
|
std::vector<std::pair<std::string, std::string>>
|
|
|
|
cruft::query_to_vector (std::string_view val)
|
2021-12-17 11:46:59 +11:00
|
|
|
{
|
2022-01-27 14:51:40 +11:00
|
|
|
std::vector<std::pair<std::string, std::string>> res;
|
2021-12-17 11:46:59 +11:00
|
|
|
|
|
|
|
for (auto const tok: cruft::tokeniser (val, '&')) {
|
|
|
|
auto const &[k, v] = cruft::split_on (tok, '=');
|
2022-01-27 14:51:40 +11:00
|
|
|
res.emplace_back (
|
2021-12-17 11:46:59 +11:00
|
|
|
std::string (k.begin (), k.size ()),
|
|
|
|
std::string (v.begin (), v.size ())
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
std::string
|
2022-01-27 14:51:40 +11:00
|
|
|
cruft::vector_to_query (std::vector<std::pair<std::string, std::string>> const &val)
|
2021-12-17 11:46:59 +11:00
|
|
|
{
|
2021-12-21 14:56:28 +11:00
|
|
|
// Test for empty up front so that we can simplify the string
|
|
|
|
// concatenation below.
|
|
|
|
if (val.empty ())
|
|
|
|
return "";
|
|
|
|
|
2021-12-17 11:46:59 +11:00
|
|
|
std::string res;
|
|
|
|
for (auto const &[k, v]: val) {
|
|
|
|
res += k;
|
|
|
|
res += '=';
|
|
|
|
res += v;
|
|
|
|
res += '&';
|
|
|
|
}
|
|
|
|
|
2021-12-21 14:56:28 +11:00
|
|
|
// The string must be non-zero length because we've tested for the empty
|
|
|
|
// set initially.
|
|
|
|
CHECK (!res.empty ());
|
2021-12-17 11:46:59 +11:00
|
|
|
res.resize (res.size () - 1);
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
bool
|
|
|
|
cruft::operator== (cruft::uri const &a, cruft::uri const &b) noexcept
|
|
|
|
{
|
|
|
|
return a.value () == b.value ();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-12-14 12:13:50 +11:00
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
static uint8_t
|
|
|
|
hex_to_uint (char c)
|
|
|
|
{
|
|
|
|
if (c >= '0' && c <= '9')
|
|
|
|
return c - '0';
|
|
|
|
|
|
|
|
if (c >= 'A' && c <= 'F')
|
|
|
|
return c - 'A' + 10;
|
|
|
|
|
|
|
|
if (c >= 'a' && c <= 'f')
|
|
|
|
return c - 'a' + 10;
|
|
|
|
|
|
|
|
unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
std::string
|
|
|
|
cruft::uri::percent_decode (view<const char*> s)
|
|
|
|
{
|
|
|
|
if (s.size () == 0)
|
|
|
|
return std::string ();
|
|
|
|
|
|
|
|
// Early check for late percent-encoding so we can simplify the decode loop
|
|
|
|
{
|
|
|
|
auto tail = std::find (s.size () < 3 ? s.begin ()
|
|
|
|
: s.end () - 2,
|
|
|
|
s.end (),
|
|
|
|
'%');
|
|
|
|
if (tail != s.end ())
|
|
|
|
throw parse_error ("triple overlaps end");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allocate and size a potentially overlong output string. This allows us
|
|
|
|
// to copy directly into its buffer. We'll shorten it at the end.
|
|
|
|
std::string out;
|
|
|
|
out.resize (s.size ());
|
|
|
|
|
|
|
|
// Find the percent, copy until that, decode, advance, repeat.
|
|
|
|
auto out_cursor = out.begin ();
|
|
|
|
|
|
|
|
for (auto i = s.begin (); i < s.end (); ++i) {
|
|
|
|
auto cursor = std::find (i, s.end (), '%');
|
|
|
|
|
|
|
|
if (cursor == s.end ()) {
|
|
|
|
out_cursor = std::copy (i, s.end (), out_cursor);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_cursor = std::copy (i, cursor, out_cursor);
|
|
|
|
*out_cursor = hex_to_uint (cursor[1]) << 4 | hex_to_uint(cursor[2]);
|
|
|
|
|
|
|
|
i += 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
out.resize (out.end () - out_cursor);
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2021-12-17 11:46:59 +11:00
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
2021-12-14 12:13:50 +11:00
|
|
|
std::ostream&
|
|
|
|
cruft::operator<< (std::ostream &os, cruft::uri::component c)
|
|
|
|
{
|
|
|
|
switch (c) {
|
|
|
|
case cruft::uri::SCHEME: return os << "SCHEME";
|
|
|
|
case cruft::uri::HIERARCHICAL: return os << "HIERARCHICAL";
|
|
|
|
case cruft::uri::AUTHORITY: return os << "AUTHORITY";
|
|
|
|
case cruft::uri::USER: return os << "USER";
|
|
|
|
case cruft::uri::HOST: return os << "HOST";
|
|
|
|
case cruft::uri::PORT: return os << "PORT";
|
|
|
|
case cruft::uri::PATH: return os << "PATH";
|
|
|
|
case cruft::uri::QUERY: return os << "QUERY";
|
|
|
|
case cruft::uri::FRAGMENT: return os << "FRAGMENT";
|
|
|
|
|
|
|
|
case cruft::uri::NUM_COMPONENTS:
|
|
|
|
unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
std::ostream&
|
|
|
|
cruft::operator<< (std::ostream &os, cruft::uri const &val)
|
|
|
|
{
|
|
|
|
return os << val.value ();
|
|
|
|
}
|
2021-12-17 11:46:59 +11:00
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
static std::string
|
|
|
|
merge (std::string_view base, std::string_view relative)
|
|
|
|
{
|
|
|
|
auto const slash = std::find (std::rbegin (base), std::rend (base), '/');
|
|
|
|
if (slash == std::rend (base))
|
|
|
|
return fmt::format ("/{}", relative);
|
|
|
|
|
|
|
|
return fmt::format (
|
|
|
|
"{}/{}",
|
|
|
|
std::string_view (base.begin (), std::distance (base.begin (), slash.base ()) - 1),
|
|
|
|
relative
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static std::string
|
|
|
|
remove_dot_segments (std::string_view path)
|
|
|
|
{
|
|
|
|
std::vector<std::string_view> src;
|
|
|
|
for (auto const &i: cruft::tokeniser (path, '/'))
|
|
|
|
src.push_back (std::string_view (i.begin (), i.size ()));
|
|
|
|
|
|
|
|
bool const absolute = !path.empty () && path[0] == '/';
|
|
|
|
bool const trailing = !src.empty () && (src.back () == "" or src.back () == "." or src.back () == "..");
|
|
|
|
|
|
|
|
std::vector<std::string_view> dst;
|
|
|
|
for (auto const &i: src) {
|
|
|
|
if (i == "..") {
|
|
|
|
if (!dst.empty ()) {
|
|
|
|
if (dst.back () == "..")
|
|
|
|
dst.push_back (i);
|
|
|
|
else
|
|
|
|
dst.pop_back ();
|
|
|
|
}
|
|
|
|
} else if (i != "." and i != "") {
|
|
|
|
dst.push_back (i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string res = absolute ? "/" : "";
|
|
|
|
for (auto const &i: dst) {
|
|
|
|
res.append (i);
|
|
|
|
res.append ("/");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!trailing)
|
|
|
|
if (!res.empty ())
|
|
|
|
res.pop_back ();
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
// Uniform Resource Identifier (URI): Generic Syntax
|
|
|
|
// https://www.ietf.org/rfc/rfc3986.txt
|
|
|
|
// 5.2. Relative Resolution
|
|
|
|
cruft::uri
|
|
|
|
cruft::resolve (cruft::uri const &base, cruft::uri const &relative)
|
|
|
|
{
|
|
|
|
using namespace std::string_literals;
|
|
|
|
|
|
|
|
if (!relative.scheme ().empty ())
|
|
|
|
return relative;
|
|
|
|
|
|
|
|
std::string_view scheme = base.scheme ();
|
|
|
|
std::string_view authority = base.authority ();
|
|
|
|
std::string path = std::string (base.path ());
|
|
|
|
std::string_view query = base.query ();
|
|
|
|
std::string_view fragment = base.fragment ();
|
|
|
|
|
|
|
|
if (!relative.scheme ().empty ()) {
|
|
|
|
scheme = relative.scheme ();
|
|
|
|
authority = relative.authority ();
|
|
|
|
path = remove_dot_segments (relative.path ());
|
|
|
|
query = relative.query ();
|
|
|
|
} else {
|
|
|
|
if (!relative.authority ().empty ()) {
|
|
|
|
authority = relative.authority ();
|
|
|
|
path = remove_dot_segments(relative.path ());
|
|
|
|
query = relative.query ();
|
|
|
|
} else {
|
|
|
|
if (relative.path ().empty ()) {
|
|
|
|
path = base.path ();
|
|
|
|
if (!relative.query ().empty ())
|
|
|
|
query = relative.query ();
|
|
|
|
else
|
|
|
|
query = base.query ();
|
|
|
|
} else {
|
|
|
|
if (relative.path ().starts_with ("/"))
|
|
|
|
path = remove_dot_segments(relative.path ());
|
|
|
|
else {
|
|
|
|
if (!base.authority ().empty () and base.path ().empty ())
|
|
|
|
path = fmt::format ("/{}", relative.path ());
|
|
|
|
else
|
|
|
|
path = merge (base.path (), relative.path ());
|
|
|
|
path = remove_dot_segments (path);
|
|
|
|
}
|
|
|
|
query = relative.query ();
|
|
|
|
}
|
|
|
|
authority = base.authority ();
|
|
|
|
}
|
|
|
|
scheme = base.scheme ();
|
|
|
|
}
|
|
|
|
|
|
|
|
fragment = relative.fragment ();
|
|
|
|
|
|
|
|
return { scheme, authority, path, query, fragment };
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
cruft::uri
|
|
|
|
cruft::normalise (cruft::uri const &src)
|
|
|
|
{
|
|
|
|
auto res = src;
|
|
|
|
res.set (
|
|
|
|
uri::component::PATH,
|
|
|
|
remove_dot_segments (src.path ())
|
|
|
|
);
|
|
|
|
return res;
|
2021-12-21 14:56:28 +11:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
template <>
|
|
|
|
bool
|
|
|
|
cruft::debug::validator<cruft::uri>::is_valid (cruft::uri const &val) noexcept
|
|
|
|
{
|
|
|
|
auto const &value = val.value ();
|
|
|
|
auto const &components = val.components ();
|
|
|
|
|
|
|
|
// Each component should fall within the value string
|
|
|
|
for (auto const &i: components) {
|
|
|
|
RETURN_FALSE_UNLESS (i.begin () >= value.data ());
|
|
|
|
RETURN_FALSE_UNLESS (i.end () <= value.data () + value.size ());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Each component reference memory after the previous component.
|
|
|
|
// Empty components can be coincident with their siblings.
|
|
|
|
for (auto const [selfidx, previdx, parentidx]: COMPONENT_ORDER) {
|
|
|
|
auto const &self = components[selfidx];
|
|
|
|
|
|
|
|
if (previdx) {
|
|
|
|
auto const &prev = components[*previdx];
|
|
|
|
|
|
|
|
RETURN_FALSE_UNLESS (prev.begin () <= self.begin ());
|
|
|
|
RETURN_FALSE_UNLESS (prev.end () <= self.end ());
|
|
|
|
RETURN_FALSE_UNLESS (prev.end () <= self.begin ());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (parentidx) {
|
|
|
|
auto const &parent = components[*parentidx];
|
|
|
|
|
|
|
|
RETURN_FALSE_UNLESS (parent.begin () <= self.begin ());
|
|
|
|
RETURN_FALSE_UNLESS (parent.end () >= self.end ());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|