#include "./uri.hpp" #include "./cast.hpp" #include "./string.hpp" #include "./debug/panic.hpp" #include "./debug/assert.hpp" #include "./debug/warn.hpp" #include using cruft::uri; /////////////////////////////////////////////////////////////////////////////// static void minimal_percent_encode (std::string &str) { for (ssize_t i = 0; i < std::ssize (str); ++i) { if (str[i] == ' ') str.replace (i, 1, "%20"); } } /////////////////////////////////////////////////////////////////////////////// cruft::uri::uri (std::string &&_value): m_offsets {}, m_value (std::move (_value)) { minimal_percent_encode (m_value); parse (); // Ensure the offsets are well-ordered. // // We want the QUERY offsets to come after the PATH offsets even if there // is no QUERY data (or at least be placed coincident with the PATH // offsets). for (int i = 1; i < NUM_COMPONENTS; ++i) { // We very specifically do not want to move any offset that has a // non-zero (ie, found) offset. // // If we do, this prevents us from computing `pqf` and retaining a // trailing "?" for an empty query. if (m_offsets[i].second or m_offsets[i].first) continue; m_offsets[i] = { m_offsets[i - 1].second, m_offsets[i - 1].second }; } CHECK_SANITY (*this); } //----------------------------------------------------------------------------- cruft::uri::uri (const char *str): uri (std::string (str)) { ; } //----------------------------------------------------------------------------- uri::uri (cruft::view _value): uri (std::string (_value.begin (), _value.end ())) { ; } //----------------------------------------------------------------------------- uri::uri (const std::string &_value): uri (std::string (_value)) { ; } //----------------------------------------------------------------------------- static std::string combine_components ( std::string_view scheme, std::string_view authority, std::string_view path, std::string_view query, std::string_view fragment ) { std::string res; res.reserve ( scheme.size () + strlen ("://") + authority.size () + path.size () + strlen ("?") + query.size () + strlen ("#") + fragment.size () ); if (!scheme.empty ()) { res += scheme; res += ":"; } if (!authority.empty ()) { res += "//"; res += authority; } res += path; if (!query.empty ()) { res += "?"; res += query; } if (!fragment.empty ()) { res += "#"; res += fragment; } return res; } //----------------------------------------------------------------------------- uri::uri ( std::string_view scheme, std::string_view authority, std::string_view path, std::string_view query, std::string_view fragment ) : uri (combine_components (scheme, authority, path, query, fragment)) { ; } /////////////////////////////////////////////////////////////////////////////// std::string_view uri::heirarchical (void) const& { for (int i = USER; i <= PATH; ++i) if (m_offsets[i].first != m_offsets[i].second) return { m_value.data () + m_offsets[ i].first, m_value.data () + m_offsets[PATH].second }; return { m_value.data () + m_offsets[USER].first, 0 }; } //----------------------------------------------------------------------------- std::string_view uri::authority (void) const& { for (int i = USER; i <= PORT; ++i) if (m_offsets[i].first != m_offsets[i].second) return { m_value.data () + m_offsets[ i].first, m_value.data () + m_offsets[PORT].second }; return { m_value.data () + m_offsets[USER].first, 0 }; } /////////////////////////////////////////////////////////////////////////////// std::string_view uri::get (component c) const& { CHECK_INDEX (c, NUM_COMPONENTS); return { m_value.data () + m_offsets[c].first, m_value.data () + m_offsets[c].second, }; } //----------------------------------------------------------------------------- void uri::set (component c, std::string_view val) { std::ptrdiff_t const diff = val.size () - (m_offsets[c].second - m_offsets[c].first); m_value.replace ( m_offsets[c].first, m_offsets[c].second - m_offsets[c].first, val ); m_offsets[c].second = cruft::cast::lossless (m_offsets[c].first + val.size ()); for (int i = c + 1; i != component::NUM_COMPONENTS; ++i) { m_offsets[i].first += diff; m_offsets[i].second += diff; } CHECK_SANITY (*this); } //----------------------------------------------------------------------------- void uri::clear_fragment () { if (m_offsets[FRAGMENT].first == m_offsets[FRAGMENT].second) return; m_value.erase ( m_offsets[FRAGMENT].first, m_offsets[FRAGMENT].second - m_offsets[FRAGMENT].first ); CHECK (m_value.back () == '#'); m_value.pop_back (); // Don't set this to nullptr. Other code assumes these views aren't null // (eg, for offsetting during copy construction). m_offsets[FRAGMENT].first = m_offsets[FRAGMENT].second = m_offsets[FRAGMENT - 1].second; } /////////////////////////////////////////////////////////////////////////////// std::vector> cruft::query_to_vector (std::string_view val) { std::vector> res; for (auto const tok: cruft::tokeniser (val, '&')) { auto const &[k, v] = cruft::split_on (tok, '='); res.emplace_back ( std::string (k.begin (), k.size ()), std::string (v.begin (), v.size ()) ); } return res; } //----------------------------------------------------------------------------- std::string cruft::vector_to_query (std::vector> const &val) { // Test for empty up front so that we can simplify the string // concatenation below. if (val.empty ()) return ""; std::string res; for (auto const &[k, v]: val) { res += k; res += '='; res += v; res += '&'; } // The string must be non-zero length because we've tested for the empty // set initially. CHECK (!res.empty ()); res.resize (res.size () - 1); return res; } /////////////////////////////////////////////////////////////////////////////// std::array uri::components () const& noexcept { std::array res; for (int i = 0; i != NUM_COMPONENTS; ++i) res[i] = get (uri::component (i)); return res; } /////////////////////////////////////////////////////////////////////////////// bool cruft::operator== (cruft::uri const &a, cruft::uri const &b) noexcept { return a.value () == b.value (); } /////////////////////////////////////////////////////////////////////////////// static uint8_t hex_to_uint (char c) { if (c >= '0' && c <= '9') return c - '0'; if (c >= 'A' && c <= 'F') return c - 'A' + 10; if (c >= 'a' && c <= 'f') return c - 'a' + 10; unreachable (); } //----------------------------------------------------------------------------- std::string cruft::uri::percent_decode (view s) { if (s.size () == 0) return std::string (); // Early check for late percent-encoding so we can simplify the decode loop { auto tail = std::find (s.size () < 3 ? s.begin () : s.end () - 2, s.end (), '%'); if (tail != s.end ()) throw parse_error ("triple overlaps end"); } // Allocate and size a potentially overlong output string. This allows us // to copy directly into its buffer. We'll shorten it at the end. std::string out; out.resize (s.size ()); // Find the percent, copy until that, decode, advance, repeat. auto out_cursor = out.begin (); for (auto i = s.begin (); i < s.end (); ++i) { auto cursor = std::find (i, s.end (), '%'); if (cursor == s.end ()) { out_cursor = std::copy (i, s.end (), out_cursor); break; } out_cursor = std::copy (i, cursor, out_cursor); *out_cursor = hex_to_uint (cursor[1]) << 4 | hex_to_uint(cursor[2]); i += 3; } out.resize (out.end () - out_cursor); return out; } /////////////////////////////////////////////////////////////////////////////// std::ostream& cruft::operator<< (std::ostream &os, cruft::uri::component c) { switch (c) { case cruft::uri::SCHEME: return os << "SCHEME"; case cruft::uri::USER: return os << "USER"; case cruft::uri::HOST: return os << "HOST"; case cruft::uri::PORT: return os << "PORT"; case cruft::uri::PATH: return os << "PATH"; case cruft::uri::QUERY: return os << "QUERY"; case cruft::uri::FRAGMENT: return os << "FRAGMENT"; case cruft::uri::NUM_COMPONENTS: unreachable (); } unreachable (); } //----------------------------------------------------------------------------- std::ostream& cruft::operator<< (std::ostream &os, cruft::uri const &val) { return os << val.value (); } /////////////////////////////////////////////////////////////////////////////// static std::string merge (std::string_view base, std::string_view relative) { auto const slash = std::find (std::rbegin (base), std::rend (base), '/'); if (slash == std::rend (base)) return fmt::format ("/{}", relative); return fmt::format ( "{}/{}", std::string_view (base.begin (), std::distance (base.begin (), slash.base ()) - 1), relative ); } //----------------------------------------------------------------------------- static std::string remove_dot_segments (std::string_view path) { std::vector src; for (auto const &i: cruft::tokeniser (path, '/')) src.push_back (std::string_view (i.begin (), i.size ())); bool const absolute = !path.empty () && path[0] == '/'; bool const trailing = !src.empty () && (src.back () == "" or src.back () == "." or src.back () == ".."); std::vector dst; for (auto const &i: src) { if (i == "..") { if (!dst.empty ()) { if (dst.back () == "..") dst.push_back (i); else dst.pop_back (); } } else if (i != "." and i != "") { dst.push_back (i); } } std::string res = absolute ? "/" : ""; for (auto const &i: dst) { res.append (i); res.append ("/"); } if (!trailing) if (!res.empty ()) res.pop_back (); return res; } //----------------------------------------------------------------------------- // Uniform Resource Identifier (URI): Generic Syntax // https://www.ietf.org/rfc/rfc3986.txt // 5.2. Relative Resolution cruft::uri cruft::resolve (cruft::uri const &base, cruft::uri const &relative) { using namespace std::string_literals; if (!relative.scheme ().empty ()) return relative; std::string_view scheme = base.scheme (); std::string_view authority = base.authority (); std::string path = std::string (base.path ()); std::string_view query = base.query (); std::string_view fragment = base.fragment (); if (!relative.scheme ().empty ()) { scheme = relative.scheme (); authority = relative.authority (); path = remove_dot_segments (relative.path ()); query = relative.query (); } else { if (!relative.authority ().empty ()) { authority = relative.authority (); path = remove_dot_segments(relative.path ()); query = relative.query (); } else { if (relative.path ().empty ()) { path = base.path (); if (!relative.query ().empty ()) query = relative.query (); else query = base.query (); } else { if (relative.path ().starts_with ("/")) path = remove_dot_segments(relative.path ()); else { if (!base.authority ().empty () and base.path ().empty ()) path = fmt::format ("/{}", relative.path ()); else path = merge (base.path (), relative.path ()); path = remove_dot_segments (path); } query = relative.query (); } authority = base.authority (); } scheme = base.scheme (); } fragment = relative.fragment (); return { scheme, authority, path, query, fragment }; } /////////////////////////////////////////////////////////////////////////////// cruft::uri cruft::normalise (cruft::uri const &src) { auto res = src; res.set ( uri::component::PATH, remove_dot_segments (src.path ()) ); return res; } /////////////////////////////////////////////////////////////////////////////// template <> bool cruft::debug::validator::is_valid (cruft::uri const &val) noexcept { auto const &value = val.value (); auto const &components = val.components (); // Each component should fall within the value string for (auto const &i: components) { RETURN_FALSE_UNLESS (i.begin () >= value.data ()); RETURN_FALSE_UNLESS (i.end () <= value.data () + value.size ()); } for (int i = 1; i < uri::NUM_COMPONENTS; ++i) { auto const &a = components[i - 1]; auto const &b = components[i ]; // All of our data needs to occur before the next component // // It may not be coincident with a sibling if there is padding between // (as is the case directly after the scheme). RETURN_FALSE_UNLESS (a.begin () <= b.begin ()); RETURN_FALSE_UNLESS (a.end () <= b.end ()); RETURN_FALSE_UNLESS (a.end () <= b.begin ()); } return true; }