libcruft-util/uri.cpp

519 lines
14 KiB
C++

#include "./uri.hpp"
#include "./string.hpp"
#include "./debug/panic.hpp"
#include <ostream>
using cruft::uri;
///////////////////////////////////////////////////////////////////////////////
#include <cruft/util/debug/assert.hpp>
#include <cruft/util/debug/warn.hpp>
#include <cruft/util/iterator/zip.hpp>
#include <optional>
//-----------------------------------------------------------------------------
using uric = cruft::uri::component;
struct {
cruft::uri::component self;
// prev sibling
std::optional<cruft::uri::component> prev;
// parent
std::optional<cruft::uri::component> parent;
} COMPONENT_ORDER[cruft::uri::NUM_COMPONENTS] {
{ uric::SCHEME, { }, {} },
{ uric::HIERARCHICAL, {uric::SCHEME }, {} },
{ uric::QUERY, {uric::HIERARCHICAL}, {} },
{ uric::FRAGMENT, {uric::QUERY }, {} },
{ uric::AUTHORITY, { }, {uric::HIERARCHICAL} },
{ uric::PATH, {uric::AUTHORITY}, {uric::HIERARCHICAL} },
{ uric::USER, { }, {uric::AUTHORITY} },
{ uric::HOST, {uric::USER}, {uric::AUTHORITY} },
{ uric::PORT, {uric::HOST}, {uric::AUTHORITY} },
};
///////////////////////////////////////////////////////////////////////////////
cruft::uri::uri (std::string &&_value):
m_views {
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr
},
m_value (std::move (_value))
{
parse ();
for (auto const &order: COMPONENT_ORDER) {
if (m_views[order.self].data ())
continue;
if (order.prev)
m_views[order.self] = { m_views[*order.prev].end (), 0 };
else if (order.parent)
m_views[order.self] = { m_views[*order.parent].begin (), 0 };
else
m_views[order.self] = { m_value.data (), 0 };
}
CHECK_SANITY (*this);
}
//-----------------------------------------------------------------------------
cruft::uri::uri (const char *str):
uri (std::string (str))
{ ; }
//-----------------------------------------------------------------------------
uri::uri (cruft::view<const char *> _value):
uri (std::string (_value.begin (), _value.end ()))
{ ; }
//-----------------------------------------------------------------------------
uri::uri (const std::string &_value):
uri (std::string (_value))
{ ; }
//-----------------------------------------------------------------------------
uri::uri (uri const &rhs)
: m_views (rhs.m_views)
, m_value (rhs.m_value)
{
auto const offset = rhs.m_value.data () - m_value.data ();
for (auto &i: m_views)
i -= offset;
CHECK_SANITY (*this);
}
//-----------------------------------------------------------------------------
uri& uri::operator= (uri &&rhs) noexcept
{
m_views = std::move (rhs.m_views);
m_value = std::move (rhs.m_value);
return *this;
}
//-----------------------------------------------------------------------------
static std::string
combine_components (
std::string_view scheme,
std::string_view authority,
std::string_view path,
std::string_view query,
std::string_view fragment
) {
std::string res;
res.reserve (
scheme.size () +
strlen ("://") + authority.size () +
path.size () +
strlen ("?") + query.size () +
strlen ("#") + fragment.size ()
);
if (!scheme.empty ()) {
res += scheme;
res += ":";
}
if (!authority.empty ()) {
res += "//";
res += authority;
}
res += path;
if (!query.empty ()) {
res += "?";
res += query;
}
if (!fragment.empty ()) {
res += "#";
res += fragment;
}
return res;
}
//-----------------------------------------------------------------------------
uri::uri (
std::string_view scheme,
std::string_view authority,
std::string_view path,
std::string_view query,
std::string_view fragment
) : uri (combine_components (scheme, authority, path, query, fragment))
{ ; }
///////////////////////////////////////////////////////////////////////////////
std::string_view
uri::get (component c) const&
{
CHECK_INDEX (c, NUM_COMPONENTS);
return { m_views[c].data (), m_views[c].size () };
}
//-----------------------------------------------------------------------------
void
uri::set (component c, std::string_view val)
{
auto const diff = val.size () - m_views[c].size ();
m_value.replace (
m_views[c].data () - m_value.data (),
m_views[c].size (),
val
);
for (int i = c + 1; i != component::NUM_COMPONENTS; ++i)
m_views[i] += diff;
}
//-----------------------------------------------------------------------------
void uri::clear_fragment ()
{
if (!m_views[FRAGMENT])
return;
m_value.erase (
m_views[FRAGMENT].begin () - m_value.data (),
m_views[FRAGMENT].size ()
);
m_views[FRAGMENT] = nullptr;
}
///////////////////////////////////////////////////////////////////////////////
std::vector<std::pair<std::string, std::string>>
cruft::query_to_vector (std::string_view val)
{
std::vector<std::pair<std::string, std::string>> res;
for (auto const tok: cruft::tokeniser (val, '&')) {
auto const &[k, v] = cruft::split_on (tok, '=');
res.emplace_back (
std::string (k.begin (), k.size ()),
std::string (v.begin (), v.size ())
);
}
return res;
}
//-----------------------------------------------------------------------------
std::string
cruft::vector_to_query (std::vector<std::pair<std::string, std::string>> const &val)
{
// Test for empty up front so that we can simplify the string
// concatenation below.
if (val.empty ())
return "";
std::string res;
for (auto const &[k, v]: val) {
res += k;
res += '=';
res += v;
res += '&';
}
// The string must be non-zero length because we've tested for the empty
// set initially.
CHECK (!res.empty ());
res.resize (res.size () - 1);
return res;
}
///////////////////////////////////////////////////////////////////////////////
bool
cruft::operator== (cruft::uri const &a, cruft::uri const &b) noexcept
{
return a.value () == b.value ();
}
///////////////////////////////////////////////////////////////////////////////
static uint8_t
hex_to_uint (char c)
{
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
unreachable ();
}
//-----------------------------------------------------------------------------
std::string
cruft::uri::percent_decode (view<const char*> s)
{
if (s.size () == 0)
return std::string ();
// Early check for late percent-encoding so we can simplify the decode loop
{
auto tail = std::find (s.size () < 3 ? s.begin ()
: s.end () - 2,
s.end (),
'%');
if (tail != s.end ())
throw parse_error ("triple overlaps end");
}
// Allocate and size a potentially overlong output string. This allows us
// to copy directly into its buffer. We'll shorten it at the end.
std::string out;
out.resize (s.size ());
// Find the percent, copy until that, decode, advance, repeat.
auto out_cursor = out.begin ();
for (auto i = s.begin (); i < s.end (); ++i) {
auto cursor = std::find (i, s.end (), '%');
if (cursor == s.end ()) {
out_cursor = std::copy (i, s.end (), out_cursor);
break;
}
out_cursor = std::copy (i, cursor, out_cursor);
*out_cursor = hex_to_uint (cursor[1]) << 4 | hex_to_uint(cursor[2]);
i += 3;
}
out.resize (out.end () - out_cursor);
return out;
}
///////////////////////////////////////////////////////////////////////////////
std::ostream&
cruft::operator<< (std::ostream &os, cruft::uri::component c)
{
switch (c) {
case cruft::uri::SCHEME: return os << "SCHEME";
case cruft::uri::HIERARCHICAL: return os << "HIERARCHICAL";
case cruft::uri::AUTHORITY: return os << "AUTHORITY";
case cruft::uri::USER: return os << "USER";
case cruft::uri::HOST: return os << "HOST";
case cruft::uri::PORT: return os << "PORT";
case cruft::uri::PATH: return os << "PATH";
case cruft::uri::QUERY: return os << "QUERY";
case cruft::uri::FRAGMENT: return os << "FRAGMENT";
case cruft::uri::NUM_COMPONENTS:
unreachable ();
}
unreachable ();
}
//-----------------------------------------------------------------------------
std::ostream&
cruft::operator<< (std::ostream &os, cruft::uri const &val)
{
return os << val.value ();
}
///////////////////////////////////////////////////////////////////////////////
static std::string
merge (std::string_view base, std::string_view relative)
{
auto const slash = std::find (std::rbegin (base), std::rend (base), '/');
if (slash == std::rend (base))
return fmt::format ("/{}", relative);
return fmt::format (
"{}/{}",
std::string_view (base.begin (), std::distance (base.begin (), slash.base ()) - 1),
relative
);
}
//-----------------------------------------------------------------------------
static std::string
remove_dot_segments (std::string_view path)
{
std::vector<std::string_view> src;
for (auto const &i: cruft::tokeniser (path, '/'))
src.push_back (std::string_view (i.begin (), i.size ()));
bool const absolute = !path.empty () && path[0] == '/';
bool const trailing = !src.empty () && (src.back () == "" or src.back () == "." or src.back () == "..");
std::vector<std::string_view> dst;
for (auto const &i: src) {
if (i == "..") {
if (!dst.empty ()) {
if (dst.back () == "..")
dst.push_back (i);
else
dst.pop_back ();
}
} else if (i != "." and i != "") {
dst.push_back (i);
}
}
std::string res = absolute ? "/" : "";
for (auto const &i: dst) {
res.append (i);
res.append ("/");
}
if (!trailing)
if (!res.empty ())
res.pop_back ();
return res;
}
//-----------------------------------------------------------------------------
// Uniform Resource Identifier (URI): Generic Syntax
// https://www.ietf.org/rfc/rfc3986.txt
// 5.2. Relative Resolution
cruft::uri
cruft::resolve (cruft::uri const &base, cruft::uri const &relative)
{
using namespace std::string_literals;
if (!relative.scheme ().empty ())
return relative;
std::string_view scheme = base.scheme ();
std::string_view authority = base.authority ();
std::string path = std::string (base.path ());
std::string_view query = base.query ();
std::string_view fragment = base.fragment ();
if (!relative.scheme ().empty ()) {
scheme = relative.scheme ();
authority = relative.authority ();
path = remove_dot_segments (relative.path ());
query = relative.query ();
} else {
if (!relative.authority ().empty ()) {
authority = relative.authority ();
path = remove_dot_segments(relative.path ());
query = relative.query ();
} else {
if (relative.path ().empty ()) {
path = base.path ();
if (!relative.query ().empty ())
query = relative.query ();
else
query = base.query ();
} else {
if (relative.path ().starts_with ("/"))
path = remove_dot_segments(relative.path ());
else {
if (!base.authority ().empty () and base.path ().empty ())
path = fmt::format ("/{}", relative.path ());
else
path = merge (base.path (), relative.path ());
path = remove_dot_segments (path);
}
query = relative.query ();
}
authority = base.authority ();
}
scheme = base.scheme ();
}
fragment = relative.fragment ();
return { scheme, authority, path, query, fragment };
}
///////////////////////////////////////////////////////////////////////////////
cruft::uri
cruft::normalise (cruft::uri const &src)
{
auto res = src;
res.set (
uri::component::PATH,
remove_dot_segments (src.path ())
);
return res;
}
///////////////////////////////////////////////////////////////////////////////
template <>
bool
cruft::debug::validator<cruft::uri>::is_valid (cruft::uri const &val) noexcept
{
auto const &value = val.value ();
auto const &components = val.components ();
// Each component should fall within the value string
for (auto const &i: components) {
RETURN_FALSE_UNLESS (i.begin () >= value.data ());
RETURN_FALSE_UNLESS (i.end () <= value.data () + value.size ());
}
// Each component reference memory after the previous component.
// Empty components can be coincident with their siblings.
for (auto const [selfidx, previdx, parentidx]: COMPONENT_ORDER) {
auto const &self = components[selfidx];
if (previdx) {
auto const &prev = components[*previdx];
RETURN_FALSE_UNLESS (prev.begin () <= self.begin ());
RETURN_FALSE_UNLESS (prev.end () <= self.end ());
RETURN_FALSE_UNLESS (prev.end () <= self.begin ());
}
if (parentidx) {
auto const &parent = components[*parentidx];
RETURN_FALSE_UNLESS (parent.begin () <= self.begin ());
RETURN_FALSE_UNLESS (parent.end () >= self.end ());
}
}
return true;
}