#include "./uri.hpp" #include "./string.hpp" #include "./debug/panic.hpp" #include using cruft::uri; /////////////////////////////////////////////////////////////////////////////// #include #include #include #include //----------------------------------------------------------------------------- using uric = cruft::uri::component; struct { cruft::uri::component self; // prev sibling std::optional prev; // parent std::optional parent; } COMPONENT_ORDER[cruft::uri::NUM_COMPONENTS] { { uric::SCHEME, { }, {} }, { uric::HIERARCHICAL, {uric::SCHEME }, {} }, { uric::QUERY, {uric::HIERARCHICAL}, {} }, { uric::FRAGMENT, {uric::QUERY }, {} }, { uric::AUTHORITY, { }, {uric::HIERARCHICAL} }, { uric::PATH, {uric::AUTHORITY}, {uric::HIERARCHICAL} }, { uric::USER, { }, {uric::AUTHORITY} }, { uric::HOST, {uric::USER}, {uric::AUTHORITY} }, { uric::PORT, {uric::HOST}, {uric::AUTHORITY} }, }; /////////////////////////////////////////////////////////////////////////////// cruft::uri::uri (std::string &&_value): m_views { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }, m_value (std::move (_value)) { parse (); for (auto const &order: COMPONENT_ORDER) { if (m_views[order.self].data ()) continue; if (order.prev) m_views[order.self] = { m_views[*order.prev].end (), 0 }; else if (order.parent) m_views[order.self] = { m_views[*order.parent].begin (), 0 }; else m_views[order.self] = { m_value.data (), 0 }; } CHECK_SANITY (*this); } //----------------------------------------------------------------------------- cruft::uri::uri (const char *str): uri (std::string (str)) { ; } //----------------------------------------------------------------------------- uri::uri (cruft::view _value): uri (std::string (_value.begin (), _value.end ())) { ; } //----------------------------------------------------------------------------- uri::uri (const std::string &_value): uri (std::string (_value)) { ; } //----------------------------------------------------------------------------- uri::uri (uri const &rhs) : m_views (rhs.m_views) , m_value (rhs.m_value) { auto const offset = rhs.m_value.data () - m_value.data (); for (auto &i: m_views) i -= offset; CHECK_SANITY (*this); } //----------------------------------------------------------------------------- uri& uri::operator= (uri &&rhs) noexcept { m_views = std::move (rhs.m_views); m_value = std::move (rhs.m_value); return *this; } //----------------------------------------------------------------------------- static std::string combine_components ( std::string_view scheme, std::string_view authority, std::string_view path, std::string_view query, std::string_view fragment ) { std::string res; res.reserve ( scheme.size () + strlen ("://") + authority.size () + path.size () + strlen ("?") + query.size () + strlen ("#") + fragment.size () ); if (!scheme.empty ()) { res += scheme; res += ":"; } if (!authority.empty ()) { res += "//"; res += authority; } res += path; if (!query.empty ()) { res += "?"; res += query; } if (!fragment.empty ()) { res += "#"; res += fragment; } return res; } //----------------------------------------------------------------------------- uri::uri ( std::string_view scheme, std::string_view authority, std::string_view path, std::string_view query, std::string_view fragment ) : uri (combine_components (scheme, authority, path, query, fragment)) { ; } /////////////////////////////////////////////////////////////////////////////// std::string_view uri::get (component c) const& { CHECK_INDEX (c, NUM_COMPONENTS); return { m_views[c].data (), m_views[c].size () }; } //----------------------------------------------------------------------------- void uri::set (component c, std::string_view val) { auto const diff = val.size () - m_views[c].size (); m_value.replace ( m_views[c].data () - m_value.data (), m_views[c].size (), val ); for (int i = c + 1; i != component::NUM_COMPONENTS; ++i) m_views[i] += diff; } //----------------------------------------------------------------------------- void uri::clear_fragment () { if (!m_views[FRAGMENT]) return; m_value.erase ( m_views[FRAGMENT].begin () - m_value.data (), m_views[FRAGMENT].size () ); m_views[FRAGMENT] = nullptr; } /////////////////////////////////////////////////////////////////////////////// std::map cruft::query_to_map (std::string_view val) { std::map res; for (auto const tok: cruft::tokeniser (val, '&')) { auto const &[k, v] = cruft::split_on (tok, '='); res.emplace ( std::string (k.begin (), k.size ()), std::string (v.begin (), v.size ()) ); } return res; } //----------------------------------------------------------------------------- std::string cruft::map_to_query (std::map const &val) { // Test for empty up front so that we can simplify the string // concatenation below. if (val.empty ()) return ""; std::string res; for (auto const &[k, v]: val) { res += k; res += '='; res += v; res += '&'; } // The string must be non-zero length because we've tested for the empty // set initially. CHECK (!res.empty ()); res.resize (res.size () - 1); return res; } /////////////////////////////////////////////////////////////////////////////// bool cruft::operator== (cruft::uri const &a, cruft::uri const &b) noexcept { return a.value () == b.value (); } /////////////////////////////////////////////////////////////////////////////// static uint8_t hex_to_uint (char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'A' && c <= 'F') return c - 'A' + 10; if (c >= 'a' && c <= 'f') return c - 'a' + 10; unreachable (); } //----------------------------------------------------------------------------- std::string cruft::uri::percent_decode (view s) { if (s.size () == 0) return std::string (); // Early check for late percent-encoding so we can simplify the decode loop { auto tail = std::find (s.size () < 3 ? s.begin () : s.end () - 2, s.end (), '%'); if (tail != s.end ()) throw parse_error ("triple overlaps end"); } // Allocate and size a potentially overlong output string. This allows us // to copy directly into its buffer. We'll shorten it at the end. std::string out; out.resize (s.size ()); // Find the percent, copy until that, decode, advance, repeat. auto out_cursor = out.begin (); for (auto i = s.begin (); i < s.end (); ++i) { auto cursor = std::find (i, s.end (), '%'); if (cursor == s.end ()) { out_cursor = std::copy (i, s.end (), out_cursor); break; } out_cursor = std::copy (i, cursor, out_cursor); *out_cursor = hex_to_uint (cursor[1]) << 4 | hex_to_uint(cursor[2]); i += 3; } out.resize (out.end () - out_cursor); return out; } /////////////////////////////////////////////////////////////////////////////// std::ostream& cruft::operator<< (std::ostream &os, cruft::uri::component c) { switch (c) { case cruft::uri::SCHEME: return os << "SCHEME"; case cruft::uri::HIERARCHICAL: return os << "HIERARCHICAL"; case cruft::uri::AUTHORITY: return os << "AUTHORITY"; case cruft::uri::USER: return os << "USER"; case cruft::uri::HOST: return os << "HOST"; case cruft::uri::PORT: return os << "PORT"; case cruft::uri::PATH: return os << "PATH"; case cruft::uri::QUERY: return os << "QUERY"; case cruft::uri::FRAGMENT: return os << "FRAGMENT"; case cruft::uri::NUM_COMPONENTS: unreachable (); } unreachable (); } //----------------------------------------------------------------------------- std::ostream& cruft::operator<< (std::ostream &os, cruft::uri const &val) { return os << val.value (); } /////////////////////////////////////////////////////////////////////////////// static std::string merge (std::string_view base, std::string_view relative) { auto const slash = std::find (std::rbegin (base), std::rend (base), '/'); if (slash == std::rend (base)) return fmt::format ("/{}", relative); return fmt::format ( "{}/{}", std::string_view (base.begin (), std::distance (base.begin (), slash.base ()) - 1), relative ); } //----------------------------------------------------------------------------- static std::string remove_dot_segments (std::string_view path) { std::vector src; for (auto const &i: cruft::tokeniser (path, '/')) src.push_back (std::string_view (i.begin (), i.size ())); bool const absolute = !path.empty () && path[0] == '/'; bool const trailing = !src.empty () && (src.back () == "" or src.back () == "." or src.back () == ".."); std::vector dst; for (auto const &i: src) { if (i == "..") { if (!dst.empty ()) { if (dst.back () == "..") dst.push_back (i); else dst.pop_back (); } } else if (i != "." and i != "") { dst.push_back (i); } } std::string res = absolute ? "/" : ""; for (auto const &i: dst) { res.append (i); res.append ("/"); } if (!trailing) if (!res.empty ()) res.pop_back (); return res; } //----------------------------------------------------------------------------- // Uniform Resource Identifier (URI): Generic Syntax // https://www.ietf.org/rfc/rfc3986.txt // 5.2. Relative Resolution cruft::uri cruft::resolve (cruft::uri const &base, cruft::uri const &relative) { using namespace std::string_literals; if (!relative.scheme ().empty ()) return relative; std::string_view scheme = base.scheme (); std::string_view authority = base.authority (); std::string path = std::string (base.path ()); std::string_view query = base.query (); std::string_view fragment = base.fragment (); if (!relative.scheme ().empty ()) { scheme = relative.scheme (); authority = relative.authority (); path = remove_dot_segments (relative.path ()); query = relative.query (); } else { if (!relative.authority ().empty ()) { authority = relative.authority (); path = remove_dot_segments(relative.path ()); query = relative.query (); } else { if (relative.path ().empty ()) { path = base.path (); if (!relative.query ().empty ()) query = relative.query (); else query = base.query (); } else { if (relative.path ().starts_with ("/")) path = remove_dot_segments(relative.path ()); else { if (!base.authority ().empty () and base.path ().empty ()) path = fmt::format ("/{}", relative.path ()); else path = merge (base.path (), relative.path ()); path = remove_dot_segments (path); } query = relative.query (); } authority = base.authority (); } scheme = base.scheme (); } fragment = relative.fragment (); return { scheme, authority, path, query, fragment }; } /////////////////////////////////////////////////////////////////////////////// cruft::uri cruft::normalise (cruft::uri const &src) { auto res = src; res.set ( uri::component::PATH, remove_dot_segments (src.path ()) ); return res; } /////////////////////////////////////////////////////////////////////////////// template <> bool cruft::debug::validator::is_valid (cruft::uri const &val) noexcept { auto const &value = val.value (); auto const &components = val.components (); // Each component should fall within the value string for (auto const &i: components) { RETURN_FALSE_UNLESS (i.begin () >= value.data ()); RETURN_FALSE_UNLESS (i.end () <= value.data () + value.size ()); } // Each component reference memory after the previous component. // Empty components can be coincident with their siblings. for (auto const [selfidx, previdx, parentidx]: COMPONENT_ORDER) { auto const &self = components[selfidx]; if (previdx) { auto const &prev = components[*previdx]; RETURN_FALSE_UNLESS (prev.begin () <= self.begin ()); RETURN_FALSE_UNLESS (prev.end () <= self.end ()); RETURN_FALSE_UNLESS (prev.end () <= self.begin ()); } if (parentidx) { auto const &parent = components[*parentidx]; RETURN_FALSE_UNLESS (parent.begin () <= self.begin ()); RETURN_FALSE_UNLESS (parent.end () >= self.end ()); } } return true; }