uri: improve hierarchical parsing reliability

This commit is contained in:
Danny Robson 2021-12-21 13:56:28 +10:00
parent d86f60413e
commit 2488846e38
5 changed files with 173 additions and 47 deletions

View File

@ -112,21 +112,24 @@
## URI types
hier_part = (
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
) | (
path_absolute >path_begin %path_end
| path_rootless >path_begin %path_end
| path_empty >path_begin %path_end
) >hier_begin %hier_end;
(
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
) | (
path_absolute >path_begin %path_end
| path_rootless >path_begin %path_end
| path_empty >path_begin %path_end
) >hier_begin %hier_end
);
uri = scheme ':' hier_part ('?' query)? ('#' fragment)?;
relative_part =
'//' authority path_abempty >path_begin %path_end
| path_absolute >path_begin %path_end
relative_part = (
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
) | (
path_absolute >path_begin %path_end
| path_noscheme >path_begin %path_end
| path_empty >path_begin %path_end
;
) >hier_begin %hier_end;
relative_ref = relative_part ('?' query)? ('#' fragment)?;

View File

@ -175,6 +175,7 @@ test_normalise (cruft::TAP::logger &tap)
{
struct {
char const *init;
char const *expected;
} TESTS[] = {
// {

122
uri.cpp
View File

@ -9,7 +9,74 @@
using cruft::uri;
///////////////////////////////////////////////////////////////////////////////
#include <cruft/util/debug/assert.hpp>
#include <cruft/util/debug/warn.hpp>
#include <cruft/util/iterator/zip.hpp>
#include <optional>
//-----------------------------------------------------------------------------
using uric = cruft::uri::component;
struct {
cruft::uri::component self;
// prev sibling
std::optional<cruft::uri::component> prev;
// parent
std::optional<cruft::uri::component> parent;
} COMPONENT_ORDER[cruft::uri::NUM_COMPONENTS] {
{ uric::SCHEME, { }, {} },
{ uric::HIERARCHICAL, {uric::SCHEME }, {} },
{ uric::QUERY, {uric::HIERARCHICAL}, {} },
{ uric::FRAGMENT, {uric::QUERY }, {} },
{ uric::AUTHORITY, { }, {uric::HIERARCHICAL} },
{ uric::PATH, {uric::AUTHORITY}, {uric::HIERARCHICAL} },
{ uric::USER, { }, {uric::AUTHORITY} },
{ uric::HOST, {uric::USER}, {uric::AUTHORITY} },
{ uric::PORT, {uric::HOST}, {uric::AUTHORITY} },
};
///////////////////////////////////////////////////////////////////////////////
cruft::uri::uri (std::string &&_value):
m_views {
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr
},
m_value (std::move (_value))
{
parse ();
for (auto const &order: COMPONENT_ORDER) {
if (m_views[order.self].data ())
continue;
if (order.prev)
m_views[order.self] = { m_views[*order.prev].end (), 0 };
else if (order.parent)
m_views[order.self] = { m_views[*order.parent].begin (), 0 };
else
m_views[order.self] = { m_value.data (), 0 };
}
CHECK_SANITY (*this);
}
//-----------------------------------------------------------------------------
cruft::uri::uri (const char *str):
uri (std::string (str))
{ ; }
@ -33,8 +100,11 @@ uri::uri (uri const &rhs)
, m_value (rhs.m_value)
{
auto const offset = rhs.m_value.data () - m_value.data ();
for (auto &i: m_views)
i = { i.begin () - offset, i.end () - offset };
i -= offset;
CHECK_SANITY (*this);
}
@ -47,7 +117,6 @@ uri& uri::operator= (uri &&rhs) noexcept
}
//-----------------------------------------------------------------------------
static std::string
combine_components (
@ -163,6 +232,11 @@ cruft::query_to_map (std::string_view val)
std::string
cruft::map_to_query (std::map<std::string, std::string> const &val)
{
// Test for empty up front so that we can simplify the string
// concatenation below.
if (val.empty ())
return "";
std::string res;
for (auto const &[k, v]: val) {
res += k;
@ -171,6 +245,9 @@ cruft::map_to_query (std::map<std::string, std::string> const &val)
res += '&';
}
// The string must be non-zero length because we've tested for the empty
// set initially.
CHECK (!res.empty ());
res.resize (res.size () - 1);
return res;
}
@ -400,4 +477,43 @@ cruft::normalise (cruft::uri const &src)
remove_dot_segments (src.path ())
);
return res;
}
}
///////////////////////////////////////////////////////////////////////////////
template <>
bool
cruft::debug::validator<cruft::uri>::is_valid (cruft::uri const &val) noexcept
{
auto const &value = val.value ();
auto const &components = val.components ();
// Each component should fall within the value string
for (auto const &i: components) {
RETURN_FALSE_UNLESS (i.begin () >= value.data ());
RETURN_FALSE_UNLESS (i.end () <= value.data () + value.size ());
}
// Each component reference memory after the previous component.
// Empty components can be coincident with their siblings.
for (auto const [selfidx, previdx, parentidx]: COMPONENT_ORDER) {
auto const &self = components[selfidx];
if (previdx) {
auto const &prev = components[*previdx];
RETURN_FALSE_UNLESS (prev.begin () <= self.begin ());
RETURN_FALSE_UNLESS (prev.end () <= self.end ());
RETURN_FALSE_UNLESS (prev.end () <= self.begin ());
}
if (parentidx) {
auto const &parent = components[*parentidx];
RETURN_FALSE_UNLESS (parent.begin () <= self.begin ());
RETURN_FALSE_UNLESS (parent.end () >= self.end ());
}
}
return true;
}

View File

@ -8,6 +8,8 @@
#include "uri.hpp"
#include <cruft/util/debug/assert.hpp>
#include <algorithm>
#include <iostream>
@ -26,36 +28,35 @@ using cruft::uri;
action success {__success = true; }
action failure {__success = false; }
action scheme_begin { m_views[SCHEME] = { p, p }; }
action scheme_end { m_views[SCHEME] = { m_views[SCHEME].begin (), p }; }
action scheme_begin { starts[SCHEME] = p; }
action scheme_end { CHECK (starts[SCHEME]); m_views[SCHEME] = { starts[SCHEME], p }; }
action hier_begin { m_views[HIERARCHICAL] = { p, p }; }
action hier_end { m_views[HIERARCHICAL] = { m_views[HIERARCHICAL].begin (), p }; }
action hier_begin { starts[HIERARCHICAL] = p; }
action hier_end { CHECK (starts[HIERARCHICAL]); m_views[HIERARCHICAL] = { starts[HIERARCHICAL], p }; }
action user_begin { m_views[USER] = { p, p }; }
action user_end { m_views[USER] = { m_views[USER].begin (), p }; }
action user_begin { starts[USER] = p; }
action user_end { CHECK (starts[USER]); m_views[USER] = { starts[USER], p }; }
action host_begin { m_views[HOST] = { p, p }; }
action host_end { m_views[HOST] = { m_views[HOST].begin (), p }; }
action host_begin { starts[HOST] = p; }
action host_end { CHECK (starts[HOST]); m_views[HOST] = { starts[HOST], p }; }
action port_begin { m_views[PORT] = { p, p }; }
action port_end { m_views[PORT] = { m_views[PORT].begin (), p }; }
action port_begin { starts[PORT] = p; }
action port_end { CHECK (starts[PORT]); m_views[PORT] = { starts[PORT], p }; }
action authority_begin { m_views[AUTHORITY] = { p, p}; }
action authority_end { m_views[AUTHORITY] = { m_views[AUTHORITY].begin (), p }; }
action authority_begin { starts[AUTHORITY] = p; }
action authority_end { CHECK (starts[AUTHORITY]); m_views[AUTHORITY] = { starts[AUTHORITY], p }; }
action path_begin { m_views[PATH] = { p, p}; }
action path_end { m_views[PATH] = { m_views[PATH].begin (), p }; }
action path_begin { starts[PATH] = p; }
action path_end { CHECK (starts[PATH]); m_views[PATH] = { starts[PATH], p }; }
action query_begin { m_views[QUERY] = { p, p}; }
action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; }
action query_begin { starts[QUERY] = p; }
action query_end { CHECK (starts[QUERY]); m_views[QUERY] = { starts[QUERY], p }; }
action fragment_begin { m_views[FRAGMENT] = { p, p}; }
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }
action fragment_begin { starts[FRAGMENT] = p; }
action fragment_end { CHECK (starts[FRAGMENT]); m_views[FRAGMENT] = { starts[FRAGMENT], p }; }
action uri_begin {}
action uri_end {}
action uri_end {}
include rfc3986 'rfc3986.rl';
@ -69,8 +70,12 @@ using cruft::uri;
///////////////////////////////////////////////////////////////////////////////
cruft::uri::uri (std::string &&_value):
m_views {
void
cruft::uri::parse (void)
{
char const *starts[NUM_COMPONENTS] = {};
m_views = {
nullptr,
nullptr,
nullptr,
@ -80,9 +85,8 @@ cruft::uri::uri (std::string &&_value):
nullptr,
nullptr,
nullptr
},
m_value (std::move (_value))
{
};
const char *p = m_value.data ();
const char *pe = m_value.data () + m_value.size ();
const char *eof = pe;

20
uri.hpp
View File

@ -69,15 +69,15 @@ namespace cruft {
// QUERY: 'foo=bar'
// FRAGMENT: 'fragment'
enum component {
SCHEME,
HIERARCHICAL,
AUTHORITY,
USER,
HOST,
PORT,
PATH,
QUERY,
FRAGMENT,
/* 0 */ SCHEME,
/* 1 */ HIERARCHICAL,
/* 2 */ AUTHORITY,
/* 3 */ USER,
/* 4 */ HOST,
/* 5 */ PORT,
/* 6 */ PATH,
/* 7 */ QUERY,
/* 8 */ FRAGMENT,
NUM_COMPONENTS
};
@ -108,6 +108,8 @@ namespace cruft {
static std::string percent_decode (view<const char*>);
private:
void parse (void);
std::array<view<const char*>, NUM_COMPONENTS> m_views;
std::string m_value;
};