uri: add resolve and normalise operations

This commit is contained in:
Danny Robson 2021-12-17 10:46:59 +10:00
parent 2d0324f17a
commit 458f109c6f
5 changed files with 488 additions and 20 deletions

View File

@ -71,7 +71,7 @@
path_absolute = '/' (segment_nz ('/' segment)*)?; path_absolute = '/' (segment_nz ('/' segment)*)?;
path_noscheme = segment_nz_nc ('/' segment)*; path_noscheme = segment_nz_nc ('/' segment)*;
path_rootless = segment_nz ('/' segment)*; path_rootless = segment_nz ('/' segment)*;
path_empty = '0' pchar; path_empty = zlen;
path = ( path = (
path_abempty | path_absolute | path_noscheme | path_rootless | path_empty path_abempty | path_absolute | path_noscheme | path_rootless | path_empty
@ -119,7 +119,7 @@
| path_empty >path_begin %path_end | path_empty >path_begin %path_end
) >hier_begin %hier_end; ) >hier_begin %hier_end;
uri = scheme ':' hier_part ('?' query)? ('#' fragment); uri = scheme ':' hier_part ('?' query)? ('#' fragment)?;
relative_part = relative_part =
'//' authority path_abempty >path_begin %path_end '//' authority path_abempty >path_begin %path_end
@ -128,7 +128,7 @@
| path_empty >path_begin %path_end | path_empty >path_begin %path_end
; ;
relative_ref = relative_part ('?' query)? ('#' fragment); relative_ref = relative_part ('?' query)? ('#' fragment)?;
uri_reference = uri | relative_ref; uri_reference = uri | relative_ref;

View File

@ -2,11 +2,15 @@
#include "tap.hpp" #include "tap.hpp"
int #include <fmt/ostream.h>
main (void)
{
cruft::TAP::logger tap;
#include <ostream>
///////////////////////////////////////////////////////////////////////////////
static void
test_parse (cruft::TAP::logger &tap)
{
static const struct { static const struct {
const char *src; const char *src;
@ -155,14 +159,195 @@ main (void)
} }
static const char* BAD[] = { static const char* BAD[] = {
"www.google.com.au", // "www.google.com.au",
}; };
for (auto i: BAD) for (auto i: BAD)
tap.expect_throw<cruft::uri::parse_error> ( tap.expect_throw<cruft::uri::parse_error> (
[i] (void) { cruft::uri foo (i); }, "throw parsing '{:s}'", i [i] (void) { cruft::uri foo (i); }, "throw parsing '{:s}'", i
); );
}
///////////////////////////////////////////////////////////////////////////////
static void
test_normalise (cruft::TAP::logger &tap)
{
struct {
char const *init;
char const *expected;
} TESTS[] = {
// {
// // RFC 3986 example
// "/a/b/c/./../../g",
// "/a/g"
// },
// {
// // RFC 3986 example
// "mid/content=5/../6",
// "mid/6"
// },
{
"http://example.com/",
"http://example.com/",
},
{
"http://example.com/./",
"http://example.com/",
},
{
"http://example.com/../",
"http://example.com/",
},
{
"http://example.com/a/../b",
"http://example.com/b",
},
{
"http://example.com/a/../b/",
"http://example.com/b/",
},
{
"http://example.com/a/./b",
"http://example.com/a/b",
},
{
"http://example.com/a/./b/",
"http://example.com/a/b/",
},
{
"http://example.com/a/b/c/./d/e",
"http://example.com/a/b/c/d/e",
},
{
"http://example.com/a/b/c/../d/e",
"http://example.com/a/b/d/e",
},
{
"http://example.com/a/b/c/../../d/e",
"http://example.com/a/d/e",
},
{
"http://example.com/a/b/c/.././../d/e",
"http://example.com/a/d/e",
},
};
for (auto const [init, expected]: TESTS) {
cruft::uri init_obj (init);
cruft::uri expected_obj (expected);
auto const res = normalise (init_obj);
if (res != expected_obj)
fmt::print (stderr, "# '{}' != '{}'\n", res, expected_obj);
tap.expect_eq (res, expected_obj, "normalise('{}')", init);
}
}
///////////////////////////////////////////////////////////////////////////////
static void
test_rfc_resolve (cruft::TAP::logger &tap)
{
static constexpr char const *BASE = "http://a/b/c/d;p?q";
struct {
char const *relative;
char const *resolved;
} TESTS[] = {
{ "g:h", "g:h" },
{ "g", "http://a/b/c/g" },
{ "./g", "http://a/b/c/g" },
{ "g/", "http://a/b/c/g/" },
{ "/g", "http://a/g" },
{ "//g", "http://g" },
{ "?y", "http://a/b/c/d;p?y" },
{ "g?y", "http://a/b/c/g?y" },
{ "#s", "http://a/b/c/d;p?q#s" },
{ "g#s", "http://a/b/c/g#s" },
{ "g?y#s", "http://a/b/c/g?y#s" },
{ ";x", "http://a/b/c/;x" },
{ "g;x", "http://a/b/c/g;x" },
{ "g;x?y#s", "http://a/b/c/g;x?y#s" },
{ "", "http://a/b/c/d;p?q" },
{ ".", "http://a/b/c/" },
{ "./", "http://a/b/c/" },
{ "..", "http://a/b/" },
{ "../", "http://a/b/" },
{ "../g", "http://a/b/g" },
{ "../..", "http://a/" },
{ "../../", "http://a/" },
{ "../../g", "http://a/g" },
};
cruft::uri const base (BASE);
for (auto const [relative, expected]: TESTS) {
cruft::uri const relative_obj (relative);
cruft::uri const expected_obj (expected);
cruft::uri const resolved_obj = resolve (base, relative);
if (resolved_obj != expected_obj)
fmt::print (stderr, "# '{}' != '{}'\n", expected_obj, resolved_obj);
tap.expect_eq (
resolved_obj,
expected_obj,
"resolve '{}', '{}'",
base, relative
);
}
}
///////////////////////////////////////////////////////////////////////////////
void
test_resolve (cruft::TAP::logger &tap)
{
struct {
char const *base;
char const *relative;
char const *expected;
} TESTS[] = {
{
"http://example.com",
".",
"http://example.com/",
},
{
"http://example.com",
"./",
"http://example.com/",
},
};
for (auto const [base, relative, expected]: TESTS) {
cruft::uri base_obj (base);
cruft::uri relative_obj (relative);
cruft::uri expected_obj (expected);
cruft::uri computed_obj = resolve (base_obj, relative_obj);
tap.expect_eq (
resolve (base_obj, relative_obj),
expected_obj,
"resolve '{}', '{}'",
base, relative
);
}
}
///////////////////////////////////////////////////////////////////////////////
int
main (void)
{
cruft::TAP::logger tap;
test_parse (tap);
test_normalise (tap);
test_rfc_resolve (tap);
test_resolve (tap);
return tap.status (); return tap.status ();
} }

271
uri.cpp
View File

@ -1,7 +1,11 @@
#include "./uri.hpp" #include "./uri.hpp"
#include "./string.hpp"
#include "./debug/panic.hpp" #include "./debug/panic.hpp"
#include <ostream>
using cruft::uri; using cruft::uri;
@ -43,6 +47,145 @@ uri& uri::operator= (uri &&rhs) noexcept
} }
//-----------------------------------------------------------------------------
static std::string
combine_components (
std::string_view scheme,
std::string_view authority,
std::string_view path,
std::string_view query,
std::string_view fragment
) {
std::string res;
res.reserve (
scheme.size () +
strlen ("://") + authority.size () +
path.size () +
strlen ("?") + query.size () +
strlen ("#") + fragment.size ()
);
if (!scheme.empty ()) {
res += scheme;
res += ":";
}
if (!authority.empty ()) {
res += "//";
res += authority;
}
res += path;
if (!query.empty ()) {
res += "?";
res += query;
}
if (!fragment.empty ()) {
res += "#";
res += fragment;
}
return res;
}
//-----------------------------------------------------------------------------
uri::uri (
std::string_view scheme,
std::string_view authority,
std::string_view path,
std::string_view query,
std::string_view fragment
) : uri (combine_components (scheme, authority, path, query, fragment))
{ ; }
///////////////////////////////////////////////////////////////////////////////
std::string_view
uri::get (component c) const&
{
CHECK_INDEX (c, NUM_COMPONENTS);
return { m_views[c].data (), m_views[c].size () };
}
//-----------------------------------------------------------------------------
void
uri::set (component c, std::string_view val)
{
auto const diff = val.size () - m_views[c].size ();
m_value.replace (
m_views[c].data () - m_value.data (),
m_views[c].size (),
val
);
for (int i = c + 1; i != component::NUM_COMPONENTS; ++i)
m_views[i] += diff;
}
//-----------------------------------------------------------------------------
void uri::clear (component const c)
{
auto const offset = m_views[c].size ();
for (int i = c + 1; i < component::NUM_COMPONENTS; ++i)
m_views[i] -= offset;
m_value.erase (
m_views[c].begin () - m_value.data (),
m_views[c].size ()
);
m_views[c] = nullptr;
}
///////////////////////////////////////////////////////////////////////////////
std::map<std::string, std::string>
cruft::query_to_map (std::string_view val)
{
std::map<std::string, std::string> res;
for (auto const tok: cruft::tokeniser (val, '&')) {
auto const &[k, v] = cruft::split_on (tok, '=');
res.emplace (
std::string (k.begin (), k.size ()),
std::string (v.begin (), v.size ())
);
}
return res;
}
//-----------------------------------------------------------------------------
std::string
cruft::map_to_query (std::map<std::string, std::string> const &val)
{
std::string res;
for (auto const &[k, v]: val) {
res += k;
res += '=';
res += v;
res += '&';
}
res.resize (res.size () - 1);
return res;
}
///////////////////////////////////////////////////////////////////////////////
bool
cruft::operator== (cruft::uri const &a, cruft::uri const &b) noexcept
{
return a.value () == b.value ();
}
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
static uint8_t static uint8_t
hex_to_uint (char c) hex_to_uint (char c)
@ -105,7 +248,7 @@ cruft::uri::percent_decode (view<const char*> s)
//----------------------------------------------------------------------------- ///////////////////////////////////////////////////////////////////////////////
std::ostream& std::ostream&
cruft::operator<< (std::ostream &os, cruft::uri::component c) cruft::operator<< (std::ostream &os, cruft::uri::component c)
{ {
@ -134,3 +277,129 @@ cruft::operator<< (std::ostream &os, cruft::uri const &val)
{ {
return os << val.value (); return os << val.value ();
} }
///////////////////////////////////////////////////////////////////////////////
static std::string
merge (std::string_view base, std::string_view relative)
{
auto const slash = std::find (std::rbegin (base), std::rend (base), '/');
if (slash == std::rend (base))
return fmt::format ("/{}", relative);
return fmt::format (
"{}/{}",
std::string_view (base.begin (), std::distance (base.begin (), slash.base ()) - 1),
relative
);
}
//-----------------------------------------------------------------------------
static std::string
remove_dot_segments (std::string_view path)
{
std::vector<std::string_view> src;
for (auto const &i: cruft::tokeniser (path, '/'))
src.push_back (std::string_view (i.begin (), i.size ()));
bool const absolute = !path.empty () && path[0] == '/';
bool const trailing = !src.empty () && (src.back () == "" or src.back () == "." or src.back () == "..");
std::vector<std::string_view> dst;
for (auto const &i: src) {
if (i == "..") {
if (!dst.empty ()) {
if (dst.back () == "..")
dst.push_back (i);
else
dst.pop_back ();
}
} else if (i != "." and i != "") {
dst.push_back (i);
}
}
std::string res = absolute ? "/" : "";
for (auto const &i: dst) {
res.append (i);
res.append ("/");
}
if (!trailing)
if (!res.empty ())
res.pop_back ();
return res;
}
//-----------------------------------------------------------------------------
// Uniform Resource Identifier (URI): Generic Syntax
// https://www.ietf.org/rfc/rfc3986.txt
// 5.2. Relative Resolution
cruft::uri
cruft::resolve (cruft::uri const &base, cruft::uri const &relative)
{
using namespace std::string_literals;
if (!relative.scheme ().empty ())
return relative;
std::string_view scheme = base.scheme ();
std::string_view authority = base.authority ();
std::string path = std::string (base.path ());
std::string_view query = base.query ();
std::string_view fragment = base.fragment ();
if (!relative.scheme ().empty ()) {
scheme = relative.scheme ();
authority = relative.authority ();
path = remove_dot_segments (relative.path ());
query = relative.query ();
} else {
if (!relative.authority ().empty ()) {
authority = relative.authority ();
path = remove_dot_segments(relative.path ());
query = relative.query ();
} else {
if (relative.path ().empty ()) {
path = base.path ();
if (!relative.query ().empty ())
query = relative.query ();
else
query = base.query ();
} else {
if (relative.path ().starts_with ("/"))
path = remove_dot_segments(relative.path ());
else {
if (!base.authority ().empty () and base.path ().empty ())
path = fmt::format ("/{}", relative.path ());
else
path = merge (base.path (), relative.path ());
path = remove_dot_segments (path);
}
query = relative.query ();
}
authority = base.authority ();
}
scheme = base.scheme ();
}
fragment = relative.fragment ();
return { scheme, authority, path, query, fragment };
}
///////////////////////////////////////////////////////////////////////////////
cruft::uri
cruft::normalise (cruft::uri const &src)
{
auto res = src;
res.set (
uri::component::PATH,
remove_dot_segments (src.path ())
);
return res;
}

View File

@ -50,6 +50,7 @@ using cruft::uri;
action query_begin { m_views[QUERY] = { p, p}; } action query_begin { m_views[QUERY] = { p, p}; }
action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; } action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; }
action fragment_begin { m_views[FRAGMENT] = { p, p}; } action fragment_begin { m_views[FRAGMENT] = { p, p}; }
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; } action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }

33
uri.hpp
View File

@ -3,7 +3,7 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this * License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. * file, You can obtain one at http://mozilla.org/MPL/2.0/.
* *
* Copyright 2015, 2017 Danny Robson <danny@nerdcruft.net> * Copyright 2015, 2017, 2021 Danny Robson <danny@nerdcruft.net>
*/ */
#pragma once #pragma once
@ -12,9 +12,11 @@
#include "view.hpp" #include "view.hpp"
#include <array> #include <array>
#include <string>
#include <stdexcept>
#include <iosfwd> #include <iosfwd>
#include <map>
#include <stdexcept>
#include <string>
#include <string_view>
namespace cruft { namespace cruft {
@ -42,6 +44,14 @@ namespace cruft {
uri (const char *); uri (const char *);
uri (view<const char *>); uri (view<const char *>);
uri (
std::string_view scheme,
std::string_view authority,
std::string_view path,
std::string_view query,
std::string_view fragment
);
class parse_error : public std::runtime_error class parse_error : public std::runtime_error
{ using runtime_error::runtime_error; }; { using runtime_error::runtime_error; };
@ -73,11 +83,9 @@ namespace cruft {
}; };
std::string_view std::string_view
get (component c) const& get (component c) const&;
{
CHECK_INDEX (c, NUM_COMPONENTS); void set (component c, std::string_view);
return { m_views[c].data (), m_views[c].size () };
}
std::string_view all (void) const& { return m_value; } std::string_view all (void) const& { return m_value; }
std::string const& value (void) const& { return m_value; } std::string const& value (void) const& { return m_value; }
@ -103,8 +111,13 @@ namespace cruft {
std::string m_value; std::string m_value;
}; };
cruft::uri resolve (cruft::uri base, cruft::uri child); std::map<std::string, std::string> query_to_map (std::string_view);
cruft::uri normalise (cruft::uri); std::string map_to_query (std::map<std::string, std::string> const&);
bool operator== (uri const&, uri const&) noexcept;
cruft::uri resolve (cruft::uri const &base, cruft::uri const &child);
cruft::uri normalise (cruft::uri const &);
std::ostream& operator<< (std::ostream&, uri const&); std::ostream& operator<< (std::ostream&, uri const&);
std::ostream& operator<< (std::ostream&, uri::component); std::ostream& operator<< (std::ostream&, uri::component);