uri: add resolve and normalise operations
This commit is contained in:
parent
2d0324f17a
commit
458f109c6f
@ -71,7 +71,7 @@
|
||||
path_absolute = '/' (segment_nz ('/' segment)*)?;
|
||||
path_noscheme = segment_nz_nc ('/' segment)*;
|
||||
path_rootless = segment_nz ('/' segment)*;
|
||||
path_empty = '0' pchar;
|
||||
path_empty = zlen;
|
||||
|
||||
path = (
|
||||
path_abempty | path_absolute | path_noscheme | path_rootless | path_empty
|
||||
@ -119,7 +119,7 @@
|
||||
| path_empty >path_begin %path_end
|
||||
) >hier_begin %hier_end;
|
||||
|
||||
uri = scheme ':' hier_part ('?' query)? ('#' fragment);
|
||||
uri = scheme ':' hier_part ('?' query)? ('#' fragment)?;
|
||||
|
||||
relative_part =
|
||||
'//' authority path_abempty >path_begin %path_end
|
||||
@ -128,7 +128,7 @@
|
||||
| path_empty >path_begin %path_end
|
||||
;
|
||||
|
||||
relative_ref = relative_part ('?' query)? ('#' fragment);
|
||||
relative_ref = relative_part ('?' query)? ('#' fragment)?;
|
||||
|
||||
uri_reference = uri | relative_ref;
|
||||
|
||||
|
195
test/uri.cpp
195
test/uri.cpp
@ -2,11 +2,15 @@
|
||||
|
||||
#include "tap.hpp"
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
cruft::TAP::logger tap;
|
||||
#include <fmt/ostream.h>
|
||||
|
||||
#include <ostream>
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static void
|
||||
test_parse (cruft::TAP::logger &tap)
|
||||
{
|
||||
static const struct {
|
||||
const char *src;
|
||||
|
||||
@ -155,14 +159,195 @@ main (void)
|
||||
}
|
||||
|
||||
static const char* BAD[] = {
|
||||
"www.google.com.au",
|
||||
// "www.google.com.au",
|
||||
};
|
||||
|
||||
for (auto i: BAD)
|
||||
tap.expect_throw<cruft::uri::parse_error> (
|
||||
[i] (void) { cruft::uri foo (i); }, "throw parsing '{:s}'", i
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static void
|
||||
test_normalise (cruft::TAP::logger &tap)
|
||||
{
|
||||
struct {
|
||||
char const *init;
|
||||
char const *expected;
|
||||
} TESTS[] = {
|
||||
// {
|
||||
// // RFC 3986 example
|
||||
// "/a/b/c/./../../g",
|
||||
// "/a/g"
|
||||
// },
|
||||
// {
|
||||
// // RFC 3986 example
|
||||
// "mid/content=5/../6",
|
||||
// "mid/6"
|
||||
// },
|
||||
{
|
||||
"http://example.com/",
|
||||
"http://example.com/",
|
||||
},
|
||||
{
|
||||
"http://example.com/./",
|
||||
"http://example.com/",
|
||||
},
|
||||
{
|
||||
"http://example.com/../",
|
||||
"http://example.com/",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/../b",
|
||||
"http://example.com/b",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/../b/",
|
||||
"http://example.com/b/",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/./b",
|
||||
"http://example.com/a/b",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/./b/",
|
||||
"http://example.com/a/b/",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/b/c/./d/e",
|
||||
"http://example.com/a/b/c/d/e",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/b/c/../d/e",
|
||||
"http://example.com/a/b/d/e",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/b/c/../../d/e",
|
||||
"http://example.com/a/d/e",
|
||||
},
|
||||
{
|
||||
"http://example.com/a/b/c/.././../d/e",
|
||||
"http://example.com/a/d/e",
|
||||
},
|
||||
};
|
||||
|
||||
for (auto const [init, expected]: TESTS) {
|
||||
cruft::uri init_obj (init);
|
||||
cruft::uri expected_obj (expected);
|
||||
auto const res = normalise (init_obj);
|
||||
|
||||
if (res != expected_obj)
|
||||
fmt::print (stderr, "# '{}' != '{}'\n", res, expected_obj);
|
||||
|
||||
tap.expect_eq (res, expected_obj, "normalise('{}')", init);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static void
|
||||
test_rfc_resolve (cruft::TAP::logger &tap)
|
||||
{
|
||||
static constexpr char const *BASE = "http://a/b/c/d;p?q";
|
||||
|
||||
struct {
|
||||
char const *relative;
|
||||
char const *resolved;
|
||||
} TESTS[] = {
|
||||
{ "g:h", "g:h" },
|
||||
{ "g", "http://a/b/c/g" },
|
||||
{ "./g", "http://a/b/c/g" },
|
||||
{ "g/", "http://a/b/c/g/" },
|
||||
{ "/g", "http://a/g" },
|
||||
{ "//g", "http://g" },
|
||||
{ "?y", "http://a/b/c/d;p?y" },
|
||||
{ "g?y", "http://a/b/c/g?y" },
|
||||
{ "#s", "http://a/b/c/d;p?q#s" },
|
||||
{ "g#s", "http://a/b/c/g#s" },
|
||||
{ "g?y#s", "http://a/b/c/g?y#s" },
|
||||
{ ";x", "http://a/b/c/;x" },
|
||||
{ "g;x", "http://a/b/c/g;x" },
|
||||
{ "g;x?y#s", "http://a/b/c/g;x?y#s" },
|
||||
{ "", "http://a/b/c/d;p?q" },
|
||||
{ ".", "http://a/b/c/" },
|
||||
{ "./", "http://a/b/c/" },
|
||||
{ "..", "http://a/b/" },
|
||||
{ "../", "http://a/b/" },
|
||||
{ "../g", "http://a/b/g" },
|
||||
{ "../..", "http://a/" },
|
||||
{ "../../", "http://a/" },
|
||||
{ "../../g", "http://a/g" },
|
||||
};
|
||||
|
||||
cruft::uri const base (BASE);
|
||||
|
||||
for (auto const [relative, expected]: TESTS) {
|
||||
cruft::uri const relative_obj (relative);
|
||||
cruft::uri const expected_obj (expected);
|
||||
cruft::uri const resolved_obj = resolve (base, relative);
|
||||
|
||||
if (resolved_obj != expected_obj)
|
||||
fmt::print (stderr, "# '{}' != '{}'\n", expected_obj, resolved_obj);
|
||||
|
||||
tap.expect_eq (
|
||||
resolved_obj,
|
||||
expected_obj,
|
||||
"resolve '{}', '{}'",
|
||||
base, relative
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
void
|
||||
test_resolve (cruft::TAP::logger &tap)
|
||||
{
|
||||
struct {
|
||||
char const *base;
|
||||
char const *relative;
|
||||
char const *expected;
|
||||
} TESTS[] = {
|
||||
{
|
||||
"http://example.com",
|
||||
".",
|
||||
"http://example.com/",
|
||||
},
|
||||
{
|
||||
"http://example.com",
|
||||
"./",
|
||||
"http://example.com/",
|
||||
},
|
||||
};
|
||||
|
||||
for (auto const [base, relative, expected]: TESTS) {
|
||||
cruft::uri base_obj (base);
|
||||
cruft::uri relative_obj (relative);
|
||||
cruft::uri expected_obj (expected);
|
||||
cruft::uri computed_obj = resolve (base_obj, relative_obj);
|
||||
|
||||
tap.expect_eq (
|
||||
resolve (base_obj, relative_obj),
|
||||
expected_obj,
|
||||
"resolve '{}', '{}'",
|
||||
base, relative
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
cruft::TAP::logger tap;
|
||||
|
||||
test_parse (tap);
|
||||
test_normalise (tap);
|
||||
test_rfc_resolve (tap);
|
||||
test_resolve (tap);
|
||||
|
||||
return tap.status ();
|
||||
}
|
||||
|
271
uri.cpp
271
uri.cpp
@ -1,7 +1,11 @@
|
||||
#include "./uri.hpp"
|
||||
|
||||
#include "./string.hpp"
|
||||
|
||||
#include "./debug/panic.hpp"
|
||||
|
||||
#include <ostream>
|
||||
|
||||
using cruft::uri;
|
||||
|
||||
|
||||
@ -43,6 +47,145 @@ uri& uri::operator= (uri &&rhs) noexcept
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
static std::string
|
||||
combine_components (
|
||||
std::string_view scheme,
|
||||
std::string_view authority,
|
||||
std::string_view path,
|
||||
std::string_view query,
|
||||
std::string_view fragment
|
||||
) {
|
||||
std::string res;
|
||||
res.reserve (
|
||||
scheme.size () +
|
||||
strlen ("://") + authority.size () +
|
||||
path.size () +
|
||||
strlen ("?") + query.size () +
|
||||
strlen ("#") + fragment.size ()
|
||||
);
|
||||
|
||||
if (!scheme.empty ()) {
|
||||
res += scheme;
|
||||
res += ":";
|
||||
}
|
||||
|
||||
if (!authority.empty ()) {
|
||||
res += "//";
|
||||
res += authority;
|
||||
}
|
||||
|
||||
res += path;
|
||||
|
||||
if (!query.empty ()) {
|
||||
res += "?";
|
||||
res += query;
|
||||
}
|
||||
|
||||
if (!fragment.empty ()) {
|
||||
res += "#";
|
||||
res += fragment;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
uri::uri (
|
||||
std::string_view scheme,
|
||||
std::string_view authority,
|
||||
std::string_view path,
|
||||
std::string_view query,
|
||||
std::string_view fragment
|
||||
) : uri (combine_components (scheme, authority, path, query, fragment))
|
||||
{ ; }
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
std::string_view
|
||||
uri::get (component c) const&
|
||||
{
|
||||
CHECK_INDEX (c, NUM_COMPONENTS);
|
||||
return { m_views[c].data (), m_views[c].size () };
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
void
|
||||
uri::set (component c, std::string_view val)
|
||||
{
|
||||
auto const diff = val.size () - m_views[c].size ();
|
||||
m_value.replace (
|
||||
m_views[c].data () - m_value.data (),
|
||||
m_views[c].size (),
|
||||
val
|
||||
);
|
||||
for (int i = c + 1; i != component::NUM_COMPONENTS; ++i)
|
||||
m_views[i] += diff;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
void uri::clear (component const c)
|
||||
{
|
||||
auto const offset = m_views[c].size ();
|
||||
for (int i = c + 1; i < component::NUM_COMPONENTS; ++i)
|
||||
m_views[i] -= offset;
|
||||
|
||||
m_value.erase (
|
||||
m_views[c].begin () - m_value.data (),
|
||||
m_views[c].size ()
|
||||
);
|
||||
|
||||
m_views[c] = nullptr;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
std::map<std::string, std::string>
|
||||
cruft::query_to_map (std::string_view val)
|
||||
{
|
||||
std::map<std::string, std::string> res;
|
||||
|
||||
for (auto const tok: cruft::tokeniser (val, '&')) {
|
||||
auto const &[k, v] = cruft::split_on (tok, '=');
|
||||
res.emplace (
|
||||
std::string (k.begin (), k.size ()),
|
||||
std::string (v.begin (), v.size ())
|
||||
);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
std::string
|
||||
cruft::map_to_query (std::map<std::string, std::string> const &val)
|
||||
{
|
||||
std::string res;
|
||||
for (auto const &[k, v]: val) {
|
||||
res += k;
|
||||
res += '=';
|
||||
res += v;
|
||||
res += '&';
|
||||
}
|
||||
|
||||
res.resize (res.size () - 1);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
bool
|
||||
cruft::operator== (cruft::uri const &a, cruft::uri const &b) noexcept
|
||||
{
|
||||
return a.value () == b.value ();
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static uint8_t
|
||||
hex_to_uint (char c)
|
||||
@ -105,7 +248,7 @@ cruft::uri::percent_decode (view<const char*> s)
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
std::ostream&
|
||||
cruft::operator<< (std::ostream &os, cruft::uri::component c)
|
||||
{
|
||||
@ -134,3 +277,129 @@ cruft::operator<< (std::ostream &os, cruft::uri const &val)
|
||||
{
|
||||
return os << val.value ();
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
static std::string
|
||||
merge (std::string_view base, std::string_view relative)
|
||||
{
|
||||
auto const slash = std::find (std::rbegin (base), std::rend (base), '/');
|
||||
if (slash == std::rend (base))
|
||||
return fmt::format ("/{}", relative);
|
||||
|
||||
return fmt::format (
|
||||
"{}/{}",
|
||||
std::string_view (base.begin (), std::distance (base.begin (), slash.base ()) - 1),
|
||||
relative
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
static std::string
|
||||
remove_dot_segments (std::string_view path)
|
||||
{
|
||||
std::vector<std::string_view> src;
|
||||
for (auto const &i: cruft::tokeniser (path, '/'))
|
||||
src.push_back (std::string_view (i.begin (), i.size ()));
|
||||
|
||||
bool const absolute = !path.empty () && path[0] == '/';
|
||||
bool const trailing = !src.empty () && (src.back () == "" or src.back () == "." or src.back () == "..");
|
||||
|
||||
std::vector<std::string_view> dst;
|
||||
for (auto const &i: src) {
|
||||
if (i == "..") {
|
||||
if (!dst.empty ()) {
|
||||
if (dst.back () == "..")
|
||||
dst.push_back (i);
|
||||
else
|
||||
dst.pop_back ();
|
||||
}
|
||||
} else if (i != "." and i != "") {
|
||||
dst.push_back (i);
|
||||
}
|
||||
}
|
||||
|
||||
std::string res = absolute ? "/" : "";
|
||||
for (auto const &i: dst) {
|
||||
res.append (i);
|
||||
res.append ("/");
|
||||
}
|
||||
|
||||
if (!trailing)
|
||||
if (!res.empty ())
|
||||
res.pop_back ();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Uniform Resource Identifier (URI): Generic Syntax
|
||||
// https://www.ietf.org/rfc/rfc3986.txt
|
||||
// 5.2. Relative Resolution
|
||||
cruft::uri
|
||||
cruft::resolve (cruft::uri const &base, cruft::uri const &relative)
|
||||
{
|
||||
using namespace std::string_literals;
|
||||
|
||||
if (!relative.scheme ().empty ())
|
||||
return relative;
|
||||
|
||||
std::string_view scheme = base.scheme ();
|
||||
std::string_view authority = base.authority ();
|
||||
std::string path = std::string (base.path ());
|
||||
std::string_view query = base.query ();
|
||||
std::string_view fragment = base.fragment ();
|
||||
|
||||
if (!relative.scheme ().empty ()) {
|
||||
scheme = relative.scheme ();
|
||||
authority = relative.authority ();
|
||||
path = remove_dot_segments (relative.path ());
|
||||
query = relative.query ();
|
||||
} else {
|
||||
if (!relative.authority ().empty ()) {
|
||||
authority = relative.authority ();
|
||||
path = remove_dot_segments(relative.path ());
|
||||
query = relative.query ();
|
||||
} else {
|
||||
if (relative.path ().empty ()) {
|
||||
path = base.path ();
|
||||
if (!relative.query ().empty ())
|
||||
query = relative.query ();
|
||||
else
|
||||
query = base.query ();
|
||||
} else {
|
||||
if (relative.path ().starts_with ("/"))
|
||||
path = remove_dot_segments(relative.path ());
|
||||
else {
|
||||
if (!base.authority ().empty () and base.path ().empty ())
|
||||
path = fmt::format ("/{}", relative.path ());
|
||||
else
|
||||
path = merge (base.path (), relative.path ());
|
||||
path = remove_dot_segments (path);
|
||||
}
|
||||
query = relative.query ();
|
||||
}
|
||||
authority = base.authority ();
|
||||
}
|
||||
scheme = base.scheme ();
|
||||
}
|
||||
|
||||
fragment = relative.fragment ();
|
||||
|
||||
return { scheme, authority, path, query, fragment };
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
cruft::uri
|
||||
cruft::normalise (cruft::uri const &src)
|
||||
{
|
||||
auto res = src;
|
||||
res.set (
|
||||
uri::component::PATH,
|
||||
remove_dot_segments (src.path ())
|
||||
);
|
||||
return res;
|
||||
}
|
@ -50,6 +50,7 @@ using cruft::uri;
|
||||
action query_begin { m_views[QUERY] = { p, p}; }
|
||||
action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; }
|
||||
|
||||
|
||||
action fragment_begin { m_views[FRAGMENT] = { p, p}; }
|
||||
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }
|
||||
|
||||
|
33
uri.hpp
33
uri.hpp
@ -3,7 +3,7 @@
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
*
|
||||
* Copyright 2015, 2017 Danny Robson <danny@nerdcruft.net>
|
||||
* Copyright 2015, 2017, 2021 Danny Robson <danny@nerdcruft.net>
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
@ -12,9 +12,11 @@
|
||||
#include "view.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <iosfwd>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
|
||||
|
||||
namespace cruft {
|
||||
@ -42,6 +44,14 @@ namespace cruft {
|
||||
uri (const char *);
|
||||
uri (view<const char *>);
|
||||
|
||||
uri (
|
||||
std::string_view scheme,
|
||||
std::string_view authority,
|
||||
std::string_view path,
|
||||
std::string_view query,
|
||||
std::string_view fragment
|
||||
);
|
||||
|
||||
|
||||
class parse_error : public std::runtime_error
|
||||
{ using runtime_error::runtime_error; };
|
||||
@ -73,11 +83,9 @@ namespace cruft {
|
||||
};
|
||||
|
||||
std::string_view
|
||||
get (component c) const&
|
||||
{
|
||||
CHECK_INDEX (c, NUM_COMPONENTS);
|
||||
return { m_views[c].data (), m_views[c].size () };
|
||||
}
|
||||
get (component c) const&;
|
||||
|
||||
void set (component c, std::string_view);
|
||||
|
||||
std::string_view all (void) const& { return m_value; }
|
||||
std::string const& value (void) const& { return m_value; }
|
||||
@ -103,8 +111,13 @@ namespace cruft {
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
cruft::uri resolve (cruft::uri base, cruft::uri child);
|
||||
cruft::uri normalise (cruft::uri);
|
||||
std::map<std::string, std::string> query_to_map (std::string_view);
|
||||
std::string map_to_query (std::map<std::string, std::string> const&);
|
||||
|
||||
bool operator== (uri const&, uri const&) noexcept;
|
||||
|
||||
cruft::uri resolve (cruft::uri const &base, cruft::uri const &child);
|
||||
cruft::uri normalise (cruft::uri const &);
|
||||
|
||||
std::ostream& operator<< (std::ostream&, uri const&);
|
||||
std::ostream& operator<< (std::ostream&, uri::component);
|
||||
|
Loading…
Reference in New Issue
Block a user