From 458f109c6f484b9a13f8df503f91aeef6d527f68 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Fri, 17 Dec 2021 10:46:59 +1000 Subject: [PATCH] uri: add resolve and normalise operations --- rfc3986.rl | 6 +- test/uri.cpp | 197 +++++++++++++++++++++++++++++++++++-- uri.cpp | 271 ++++++++++++++++++++++++++++++++++++++++++++++++++- uri.cpp.rl | 1 + uri.hpp | 33 +++++-- 5 files changed, 488 insertions(+), 20 deletions(-) diff --git a/rfc3986.rl b/rfc3986.rl index b8da3e5b..8aa14927 100644 --- a/rfc3986.rl +++ b/rfc3986.rl @@ -71,7 +71,7 @@ path_absolute = '/' (segment_nz ('/' segment)*)?; path_noscheme = segment_nz_nc ('/' segment)*; path_rootless = segment_nz ('/' segment)*; - path_empty = '0' pchar; + path_empty = zlen; path = ( path_abempty | path_absolute | path_noscheme | path_rootless | path_empty @@ -119,7 +119,7 @@ | path_empty >path_begin %path_end ) >hier_begin %hier_end; - uri = scheme ':' hier_part ('?' query)? ('#' fragment); + uri = scheme ':' hier_part ('?' query)? ('#' fragment)?; relative_part = '//' authority path_abempty >path_begin %path_end @@ -128,7 +128,7 @@ | path_empty >path_begin %path_end ; - relative_ref = relative_part ('?' query)? ('#' fragment); + relative_ref = relative_part ('?' query)? ('#' fragment)?; uri_reference = uri | relative_ref; diff --git a/test/uri.cpp b/test/uri.cpp index eaaaac6f..0ed43bab 100644 --- a/test/uri.cpp +++ b/test/uri.cpp @@ -2,11 +2,15 @@ #include "tap.hpp" -int -main (void) -{ - cruft::TAP::logger tap; +#include +#include + + +/////////////////////////////////////////////////////////////////////////////// +static void +test_parse (cruft::TAP::logger &tap) +{ static const struct { const char *src; @@ -155,14 +159,195 @@ main (void) } static const char* BAD[] = { - "www.google.com.au", + // "www.google.com.au", }; for (auto i: BAD) tap.expect_throw ( [i] (void) { cruft::uri foo (i); }, "throw parsing '{:s}'", i - ); + ); +} +/////////////////////////////////////////////////////////////////////////////// +static void +test_normalise (cruft::TAP::logger &tap) +{ + struct { + char const *init; + char const *expected; + } TESTS[] = { + // { + // // RFC 3986 example + // "/a/b/c/./../../g", + // "/a/g" + // }, + // { + // // RFC 3986 example + // "mid/content=5/../6", + // "mid/6" + // }, + { + "http://example.com/", + "http://example.com/", + }, + { + "http://example.com/./", + "http://example.com/", + }, + { + "http://example.com/../", + "http://example.com/", + }, + { + "http://example.com/a/../b", + "http://example.com/b", + }, + { + "http://example.com/a/../b/", + "http://example.com/b/", + }, + { + "http://example.com/a/./b", + "http://example.com/a/b", + }, + { + "http://example.com/a/./b/", + "http://example.com/a/b/", + }, + { + "http://example.com/a/b/c/./d/e", + "http://example.com/a/b/c/d/e", + }, + { + "http://example.com/a/b/c/../d/e", + "http://example.com/a/b/d/e", + }, + { + "http://example.com/a/b/c/../../d/e", + "http://example.com/a/d/e", + }, + { + "http://example.com/a/b/c/.././../d/e", + "http://example.com/a/d/e", + }, + }; + + for (auto const [init, expected]: TESTS) { + cruft::uri init_obj (init); + cruft::uri expected_obj (expected); + auto const res = normalise (init_obj); + + if (res != expected_obj) + fmt::print (stderr, "# '{}' != '{}'\n", res, expected_obj); + + tap.expect_eq (res, expected_obj, "normalise('{}')", init); + } +} + + +/////////////////////////////////////////////////////////////////////////////// +static void +test_rfc_resolve (cruft::TAP::logger &tap) +{ + static constexpr char const *BASE = "http://a/b/c/d;p?q"; + + struct { + char const *relative; + char const *resolved; + } TESTS[] = { + { "g:h", "g:h" }, + { "g", "http://a/b/c/g" }, + { "./g", "http://a/b/c/g" }, + { "g/", "http://a/b/c/g/" }, + { "/g", "http://a/g" }, + { "//g", "http://g" }, + { "?y", "http://a/b/c/d;p?y" }, + { "g?y", "http://a/b/c/g?y" }, + { "#s", "http://a/b/c/d;p?q#s" }, + { "g#s", "http://a/b/c/g#s" }, + { "g?y#s", "http://a/b/c/g?y#s" }, + { ";x", "http://a/b/c/;x" }, + { "g;x", "http://a/b/c/g;x" }, + { "g;x?y#s", "http://a/b/c/g;x?y#s" }, + { "", "http://a/b/c/d;p?q" }, + { ".", "http://a/b/c/" }, + { "./", "http://a/b/c/" }, + { "..", "http://a/b/" }, + { "../", "http://a/b/" }, + { "../g", "http://a/b/g" }, + { "../..", "http://a/" }, + { "../../", "http://a/" }, + { "../../g", "http://a/g" }, + }; + + cruft::uri const base (BASE); + + for (auto const [relative, expected]: TESTS) { + cruft::uri const relative_obj (relative); + cruft::uri const expected_obj (expected); + cruft::uri const resolved_obj = resolve (base, relative); + + if (resolved_obj != expected_obj) + fmt::print (stderr, "# '{}' != '{}'\n", expected_obj, resolved_obj); + + tap.expect_eq ( + resolved_obj, + expected_obj, + "resolve '{}', '{}'", + base, relative + ); + } +} + + +/////////////////////////////////////////////////////////////////////////////// +void +test_resolve (cruft::TAP::logger &tap) +{ + struct { + char const *base; + char const *relative; + char const *expected; + } TESTS[] = { + { + "http://example.com", + ".", + "http://example.com/", + }, + { + "http://example.com", + "./", + "http://example.com/", + }, + }; + + for (auto const [base, relative, expected]: TESTS) { + cruft::uri base_obj (base); + cruft::uri relative_obj (relative); + cruft::uri expected_obj (expected); + cruft::uri computed_obj = resolve (base_obj, relative_obj); + + tap.expect_eq ( + resolve (base_obj, relative_obj), + expected_obj, + "resolve '{}', '{}'", + base, relative + ); + } +} + + +/////////////////////////////////////////////////////////////////////////////// +int +main (void) +{ + cruft::TAP::logger tap; + + test_parse (tap); + test_normalise (tap); + test_rfc_resolve (tap); + test_resolve (tap); + return tap.status (); } diff --git a/uri.cpp b/uri.cpp index bff2edd7..d7bab44b 100644 --- a/uri.cpp +++ b/uri.cpp @@ -1,7 +1,11 @@ #include "./uri.hpp" +#include "./string.hpp" + #include "./debug/panic.hpp" +#include + using cruft::uri; @@ -43,6 +47,145 @@ uri& uri::operator= (uri &&rhs) noexcept } + +//----------------------------------------------------------------------------- +static std::string +combine_components ( + std::string_view scheme, + std::string_view authority, + std::string_view path, + std::string_view query, + std::string_view fragment +) { + std::string res; + res.reserve ( + scheme.size () + + strlen ("://") + authority.size () + + path.size () + + strlen ("?") + query.size () + + strlen ("#") + fragment.size () + ); + + if (!scheme.empty ()) { + res += scheme; + res += ":"; + } + + if (!authority.empty ()) { + res += "//"; + res += authority; + } + + res += path; + + if (!query.empty ()) { + res += "?"; + res += query; + } + + if (!fragment.empty ()) { + res += "#"; + res += fragment; + } + + return res; +} + + +//----------------------------------------------------------------------------- +uri::uri ( + std::string_view scheme, + std::string_view authority, + std::string_view path, + std::string_view query, + std::string_view fragment +) : uri (combine_components (scheme, authority, path, query, fragment)) +{ ; } + + +/////////////////////////////////////////////////////////////////////////////// +std::string_view +uri::get (component c) const& +{ + CHECK_INDEX (c, NUM_COMPONENTS); + return { m_views[c].data (), m_views[c].size () }; +} + + +//----------------------------------------------------------------------------- +void +uri::set (component c, std::string_view val) +{ + auto const diff = val.size () - m_views[c].size (); + m_value.replace ( + m_views[c].data () - m_value.data (), + m_views[c].size (), + val + ); + for (int i = c + 1; i != component::NUM_COMPONENTS; ++i) + m_views[i] += diff; +} + + +//----------------------------------------------------------------------------- +void uri::clear (component const c) +{ + auto const offset = m_views[c].size (); + for (int i = c + 1; i < component::NUM_COMPONENTS; ++i) + m_views[i] -= offset; + + m_value.erase ( + m_views[c].begin () - m_value.data (), + m_views[c].size () + ); + + m_views[c] = nullptr; +} + + +/////////////////////////////////////////////////////////////////////////////// +std::map +cruft::query_to_map (std::string_view val) +{ + std::map res; + + for (auto const tok: cruft::tokeniser (val, '&')) { + auto const &[k, v] = cruft::split_on (tok, '='); + res.emplace ( + std::string (k.begin (), k.size ()), + std::string (v.begin (), v.size ()) + ); + } + + return res; +} + + +//----------------------------------------------------------------------------- +std::string +cruft::map_to_query (std::map const &val) +{ + std::string res; + for (auto const &[k, v]: val) { + res += k; + res += '='; + res += v; + res += '&'; + } + + res.resize (res.size () - 1); + return res; +} + + +/////////////////////////////////////////////////////////////////////////////// +bool +cruft::operator== (cruft::uri const &a, cruft::uri const &b) noexcept +{ + return a.value () == b.value (); +} + + /////////////////////////////////////////////////////////////////////////////// static uint8_t hex_to_uint (char c) @@ -105,7 +248,7 @@ cruft::uri::percent_decode (view s) -//----------------------------------------------------------------------------- +/////////////////////////////////////////////////////////////////////////////// std::ostream& cruft::operator<< (std::ostream &os, cruft::uri::component c) { @@ -134,3 +277,129 @@ cruft::operator<< (std::ostream &os, cruft::uri const &val) { return os << val.value (); } + + +/////////////////////////////////////////////////////////////////////////////// +static std::string +merge (std::string_view base, std::string_view relative) +{ + auto const slash = std::find (std::rbegin (base), std::rend (base), '/'); + if (slash == std::rend (base)) + return fmt::format ("/{}", relative); + + return fmt::format ( + "{}/{}", + std::string_view (base.begin (), std::distance (base.begin (), slash.base ()) - 1), + relative + ); +} + + +//----------------------------------------------------------------------------- +static std::string +remove_dot_segments (std::string_view path) +{ + std::vector src; + for (auto const &i: cruft::tokeniser (path, '/')) + src.push_back (std::string_view (i.begin (), i.size ())); + + bool const absolute = !path.empty () && path[0] == '/'; + bool const trailing = !src.empty () && (src.back () == "" or src.back () == "." or src.back () == ".."); + + std::vector dst; + for (auto const &i: src) { + if (i == "..") { + if (!dst.empty ()) { + if (dst.back () == "..") + dst.push_back (i); + else + dst.pop_back (); + } + } else if (i != "." and i != "") { + dst.push_back (i); + } + } + + std::string res = absolute ? "/" : ""; + for (auto const &i: dst) { + res.append (i); + res.append ("/"); + } + + if (!trailing) + if (!res.empty ()) + res.pop_back (); + + return res; +} + + +//----------------------------------------------------------------------------- +// Uniform Resource Identifier (URI): Generic Syntax +// https://www.ietf.org/rfc/rfc3986.txt +// 5.2. Relative Resolution +cruft::uri +cruft::resolve (cruft::uri const &base, cruft::uri const &relative) +{ + using namespace std::string_literals; + + if (!relative.scheme ().empty ()) + return relative; + + std::string_view scheme = base.scheme (); + std::string_view authority = base.authority (); + std::string path = std::string (base.path ()); + std::string_view query = base.query (); + std::string_view fragment = base.fragment (); + + if (!relative.scheme ().empty ()) { + scheme = relative.scheme (); + authority = relative.authority (); + path = remove_dot_segments (relative.path ()); + query = relative.query (); + } else { + if (!relative.authority ().empty ()) { + authority = relative.authority (); + path = remove_dot_segments(relative.path ()); + query = relative.query (); + } else { + if (relative.path ().empty ()) { + path = base.path (); + if (!relative.query ().empty ()) + query = relative.query (); + else + query = base.query (); + } else { + if (relative.path ().starts_with ("/")) + path = remove_dot_segments(relative.path ()); + else { + if (!base.authority ().empty () and base.path ().empty ()) + path = fmt::format ("/{}", relative.path ()); + else + path = merge (base.path (), relative.path ()); + path = remove_dot_segments (path); + } + query = relative.query (); + } + authority = base.authority (); + } + scheme = base.scheme (); + } + + fragment = relative.fragment (); + + return { scheme, authority, path, query, fragment }; +} + + +/////////////////////////////////////////////////////////////////////////////// +cruft::uri +cruft::normalise (cruft::uri const &src) +{ + auto res = src; + res.set ( + uri::component::PATH, + remove_dot_segments (src.path ()) + ); + return res; +} \ No newline at end of file diff --git a/uri.cpp.rl b/uri.cpp.rl index 8509a179..30199d39 100644 --- a/uri.cpp.rl +++ b/uri.cpp.rl @@ -50,6 +50,7 @@ using cruft::uri; action query_begin { m_views[QUERY] = { p, p}; } action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; } + action fragment_begin { m_views[FRAGMENT] = { p, p}; } action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; } diff --git a/uri.hpp b/uri.hpp index b0da9406..68d764f0 100644 --- a/uri.hpp +++ b/uri.hpp @@ -3,7 +3,7 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * - * Copyright 2015, 2017 Danny Robson + * Copyright 2015, 2017, 2021 Danny Robson */ #pragma once @@ -12,9 +12,11 @@ #include "view.hpp" #include -#include -#include #include +#include +#include +#include +#include namespace cruft { @@ -42,6 +44,14 @@ namespace cruft { uri (const char *); uri (view); + uri ( + std::string_view scheme, + std::string_view authority, + std::string_view path, + std::string_view query, + std::string_view fragment + ); + class parse_error : public std::runtime_error { using runtime_error::runtime_error; }; @@ -73,11 +83,9 @@ namespace cruft { }; std::string_view - get (component c) const& - { - CHECK_INDEX (c, NUM_COMPONENTS); - return { m_views[c].data (), m_views[c].size () }; - } + get (component c) const&; + + void set (component c, std::string_view); std::string_view all (void) const& { return m_value; } std::string const& value (void) const& { return m_value; } @@ -103,8 +111,13 @@ namespace cruft { std::string m_value; }; - cruft::uri resolve (cruft::uri base, cruft::uri child); - cruft::uri normalise (cruft::uri); + std::map query_to_map (std::string_view); + std::string map_to_query (std::map const&); + + bool operator== (uri const&, uri const&) noexcept; + + cruft::uri resolve (cruft::uri const &base, cruft::uri const &child); + cruft::uri normalise (cruft::uri const &); std::ostream& operator<< (std::ostream&, uri const&); std::ostream& operator<< (std::ostream&, uri::component);