uri: improve hierarchical parsing reliability
This commit is contained in:
parent
d86f60413e
commit
2488846e38
13
rfc3986.rl
13
rfc3986.rl
@ -112,21 +112,24 @@
|
|||||||
|
|
||||||
## URI types
|
## URI types
|
||||||
hier_part = (
|
hier_part = (
|
||||||
|
(
|
||||||
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
|
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
|
||||||
) | (
|
) | (
|
||||||
path_absolute >path_begin %path_end
|
path_absolute >path_begin %path_end
|
||||||
| path_rootless >path_begin %path_end
|
| path_rootless >path_begin %path_end
|
||||||
| path_empty >path_begin %path_end
|
| path_empty >path_begin %path_end
|
||||||
) >hier_begin %hier_end;
|
) >hier_begin %hier_end
|
||||||
|
);
|
||||||
|
|
||||||
uri = scheme ':' hier_part ('?' query)? ('#' fragment)?;
|
uri = scheme ':' hier_part ('?' query)? ('#' fragment)?;
|
||||||
|
|
||||||
relative_part =
|
relative_part = (
|
||||||
'//' authority path_abempty >path_begin %path_end
|
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
|
||||||
| path_absolute >path_begin %path_end
|
) | (
|
||||||
|
path_absolute >path_begin %path_end
|
||||||
| path_noscheme >path_begin %path_end
|
| path_noscheme >path_begin %path_end
|
||||||
| path_empty >path_begin %path_end
|
| path_empty >path_begin %path_end
|
||||||
;
|
) >hier_begin %hier_end;
|
||||||
|
|
||||||
relative_ref = relative_part ('?' query)? ('#' fragment)?;
|
relative_ref = relative_part ('?' query)? ('#' fragment)?;
|
||||||
|
|
||||||
|
@ -175,6 +175,7 @@ test_normalise (cruft::TAP::logger &tap)
|
|||||||
{
|
{
|
||||||
struct {
|
struct {
|
||||||
char const *init;
|
char const *init;
|
||||||
|
|
||||||
char const *expected;
|
char const *expected;
|
||||||
} TESTS[] = {
|
} TESTS[] = {
|
||||||
// {
|
// {
|
||||||
|
120
uri.cpp
120
uri.cpp
@ -9,7 +9,74 @@
|
|||||||
using cruft::uri;
|
using cruft::uri;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
#include <cruft/util/debug/assert.hpp>
|
||||||
|
#include <cruft/util/debug/warn.hpp>
|
||||||
|
#include <cruft/util/iterator/zip.hpp>
|
||||||
|
|
||||||
|
#include <optional>
|
||||||
|
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
using uric = cruft::uri::component;
|
||||||
|
|
||||||
|
|
||||||
|
struct {
|
||||||
|
cruft::uri::component self;
|
||||||
|
// prev sibling
|
||||||
|
std::optional<cruft::uri::component> prev;
|
||||||
|
// parent
|
||||||
|
std::optional<cruft::uri::component> parent;
|
||||||
|
} COMPONENT_ORDER[cruft::uri::NUM_COMPONENTS] {
|
||||||
|
{ uric::SCHEME, { }, {} },
|
||||||
|
{ uric::HIERARCHICAL, {uric::SCHEME }, {} },
|
||||||
|
{ uric::QUERY, {uric::HIERARCHICAL}, {} },
|
||||||
|
{ uric::FRAGMENT, {uric::QUERY }, {} },
|
||||||
|
|
||||||
|
{ uric::AUTHORITY, { }, {uric::HIERARCHICAL} },
|
||||||
|
{ uric::PATH, {uric::AUTHORITY}, {uric::HIERARCHICAL} },
|
||||||
|
|
||||||
|
{ uric::USER, { }, {uric::AUTHORITY} },
|
||||||
|
{ uric::HOST, {uric::USER}, {uric::AUTHORITY} },
|
||||||
|
{ uric::PORT, {uric::HOST}, {uric::AUTHORITY} },
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
cruft::uri::uri (std::string &&_value):
|
||||||
|
m_views {
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr
|
||||||
|
},
|
||||||
|
m_value (std::move (_value))
|
||||||
|
{
|
||||||
|
parse ();
|
||||||
|
|
||||||
|
for (auto const &order: COMPONENT_ORDER) {
|
||||||
|
if (m_views[order.self].data ())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (order.prev)
|
||||||
|
m_views[order.self] = { m_views[*order.prev].end (), 0 };
|
||||||
|
else if (order.parent)
|
||||||
|
m_views[order.self] = { m_views[*order.parent].begin (), 0 };
|
||||||
|
else
|
||||||
|
m_views[order.self] = { m_value.data (), 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
CHECK_SANITY (*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
cruft::uri::uri (const char *str):
|
cruft::uri::uri (const char *str):
|
||||||
uri (std::string (str))
|
uri (std::string (str))
|
||||||
{ ; }
|
{ ; }
|
||||||
@ -33,8 +100,11 @@ uri::uri (uri const &rhs)
|
|||||||
, m_value (rhs.m_value)
|
, m_value (rhs.m_value)
|
||||||
{
|
{
|
||||||
auto const offset = rhs.m_value.data () - m_value.data ();
|
auto const offset = rhs.m_value.data () - m_value.data ();
|
||||||
|
|
||||||
for (auto &i: m_views)
|
for (auto &i: m_views)
|
||||||
i = { i.begin () - offset, i.end () - offset };
|
i -= offset;
|
||||||
|
|
||||||
|
CHECK_SANITY (*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -47,7 +117,6 @@ uri& uri::operator= (uri &&rhs) noexcept
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//-----------------------------------------------------------------------------
|
//-----------------------------------------------------------------------------
|
||||||
static std::string
|
static std::string
|
||||||
combine_components (
|
combine_components (
|
||||||
@ -163,6 +232,11 @@ cruft::query_to_map (std::string_view val)
|
|||||||
std::string
|
std::string
|
||||||
cruft::map_to_query (std::map<std::string, std::string> const &val)
|
cruft::map_to_query (std::map<std::string, std::string> const &val)
|
||||||
{
|
{
|
||||||
|
// Test for empty up front so that we can simplify the string
|
||||||
|
// concatenation below.
|
||||||
|
if (val.empty ())
|
||||||
|
return "";
|
||||||
|
|
||||||
std::string res;
|
std::string res;
|
||||||
for (auto const &[k, v]: val) {
|
for (auto const &[k, v]: val) {
|
||||||
res += k;
|
res += k;
|
||||||
@ -171,6 +245,9 @@ cruft::map_to_query (std::map<std::string, std::string> const &val)
|
|||||||
res += '&';
|
res += '&';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The string must be non-zero length because we've tested for the empty
|
||||||
|
// set initially.
|
||||||
|
CHECK (!res.empty ());
|
||||||
res.resize (res.size () - 1);
|
res.resize (res.size () - 1);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
@ -401,3 +478,42 @@ cruft::normalise (cruft::uri const &src)
|
|||||||
);
|
);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <>
|
||||||
|
bool
|
||||||
|
cruft::debug::validator<cruft::uri>::is_valid (cruft::uri const &val) noexcept
|
||||||
|
{
|
||||||
|
auto const &value = val.value ();
|
||||||
|
auto const &components = val.components ();
|
||||||
|
|
||||||
|
// Each component should fall within the value string
|
||||||
|
for (auto const &i: components) {
|
||||||
|
RETURN_FALSE_UNLESS (i.begin () >= value.data ());
|
||||||
|
RETURN_FALSE_UNLESS (i.end () <= value.data () + value.size ());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each component reference memory after the previous component.
|
||||||
|
// Empty components can be coincident with their siblings.
|
||||||
|
for (auto const [selfidx, previdx, parentidx]: COMPONENT_ORDER) {
|
||||||
|
auto const &self = components[selfidx];
|
||||||
|
|
||||||
|
if (previdx) {
|
||||||
|
auto const &prev = components[*previdx];
|
||||||
|
|
||||||
|
RETURN_FALSE_UNLESS (prev.begin () <= self.begin ());
|
||||||
|
RETURN_FALSE_UNLESS (prev.end () <= self.end ());
|
||||||
|
RETURN_FALSE_UNLESS (prev.end () <= self.begin ());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parentidx) {
|
||||||
|
auto const &parent = components[*parentidx];
|
||||||
|
|
||||||
|
RETURN_FALSE_UNLESS (parent.begin () <= self.begin ());
|
||||||
|
RETURN_FALSE_UNLESS (parent.end () >= self.end ());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
52
uri.cpp.rl
52
uri.cpp.rl
@ -8,6 +8,8 @@
|
|||||||
|
|
||||||
#include "uri.hpp"
|
#include "uri.hpp"
|
||||||
|
|
||||||
|
#include <cruft/util/debug/assert.hpp>
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
@ -26,33 +28,32 @@ using cruft::uri;
|
|||||||
action success {__success = true; }
|
action success {__success = true; }
|
||||||
action failure {__success = false; }
|
action failure {__success = false; }
|
||||||
|
|
||||||
action scheme_begin { m_views[SCHEME] = { p, p }; }
|
action scheme_begin { starts[SCHEME] = p; }
|
||||||
action scheme_end { m_views[SCHEME] = { m_views[SCHEME].begin (), p }; }
|
action scheme_end { CHECK (starts[SCHEME]); m_views[SCHEME] = { starts[SCHEME], p }; }
|
||||||
|
|
||||||
action hier_begin { m_views[HIERARCHICAL] = { p, p }; }
|
action hier_begin { starts[HIERARCHICAL] = p; }
|
||||||
action hier_end { m_views[HIERARCHICAL] = { m_views[HIERARCHICAL].begin (), p }; }
|
action hier_end { CHECK (starts[HIERARCHICAL]); m_views[HIERARCHICAL] = { starts[HIERARCHICAL], p }; }
|
||||||
|
|
||||||
action user_begin { m_views[USER] = { p, p }; }
|
action user_begin { starts[USER] = p; }
|
||||||
action user_end { m_views[USER] = { m_views[USER].begin (), p }; }
|
action user_end { CHECK (starts[USER]); m_views[USER] = { starts[USER], p }; }
|
||||||
|
|
||||||
action host_begin { m_views[HOST] = { p, p }; }
|
action host_begin { starts[HOST] = p; }
|
||||||
action host_end { m_views[HOST] = { m_views[HOST].begin (), p }; }
|
action host_end { CHECK (starts[HOST]); m_views[HOST] = { starts[HOST], p }; }
|
||||||
|
|
||||||
action port_begin { m_views[PORT] = { p, p }; }
|
action port_begin { starts[PORT] = p; }
|
||||||
action port_end { m_views[PORT] = { m_views[PORT].begin (), p }; }
|
action port_end { CHECK (starts[PORT]); m_views[PORT] = { starts[PORT], p }; }
|
||||||
|
|
||||||
action authority_begin { m_views[AUTHORITY] = { p, p}; }
|
action authority_begin { starts[AUTHORITY] = p; }
|
||||||
action authority_end { m_views[AUTHORITY] = { m_views[AUTHORITY].begin (), p }; }
|
action authority_end { CHECK (starts[AUTHORITY]); m_views[AUTHORITY] = { starts[AUTHORITY], p }; }
|
||||||
|
|
||||||
action path_begin { m_views[PATH] = { p, p}; }
|
action path_begin { starts[PATH] = p; }
|
||||||
action path_end { m_views[PATH] = { m_views[PATH].begin (), p }; }
|
action path_end { CHECK (starts[PATH]); m_views[PATH] = { starts[PATH], p }; }
|
||||||
|
|
||||||
action query_begin { m_views[QUERY] = { p, p}; }
|
action query_begin { starts[QUERY] = p; }
|
||||||
action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; }
|
action query_end { CHECK (starts[QUERY]); m_views[QUERY] = { starts[QUERY], p }; }
|
||||||
|
|
||||||
|
action fragment_begin { starts[FRAGMENT] = p; }
|
||||||
action fragment_begin { m_views[FRAGMENT] = { p, p}; }
|
action fragment_end { CHECK (starts[FRAGMENT]); m_views[FRAGMENT] = { starts[FRAGMENT], p }; }
|
||||||
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }
|
|
||||||
|
|
||||||
action uri_begin {}
|
action uri_begin {}
|
||||||
action uri_end {}
|
action uri_end {}
|
||||||
@ -69,8 +70,12 @@ using cruft::uri;
|
|||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
cruft::uri::uri (std::string &&_value):
|
void
|
||||||
m_views {
|
cruft::uri::parse (void)
|
||||||
|
{
|
||||||
|
char const *starts[NUM_COMPONENTS] = {};
|
||||||
|
|
||||||
|
m_views = {
|
||||||
nullptr,
|
nullptr,
|
||||||
nullptr,
|
nullptr,
|
||||||
nullptr,
|
nullptr,
|
||||||
@ -80,9 +85,8 @@ cruft::uri::uri (std::string &&_value):
|
|||||||
nullptr,
|
nullptr,
|
||||||
nullptr,
|
nullptr,
|
||||||
nullptr
|
nullptr
|
||||||
},
|
};
|
||||||
m_value (std::move (_value))
|
|
||||||
{
|
|
||||||
const char *p = m_value.data ();
|
const char *p = m_value.data ();
|
||||||
const char *pe = m_value.data () + m_value.size ();
|
const char *pe = m_value.data () + m_value.size ();
|
||||||
const char *eof = pe;
|
const char *eof = pe;
|
||||||
|
20
uri.hpp
20
uri.hpp
@ -69,15 +69,15 @@ namespace cruft {
|
|||||||
// QUERY: 'foo=bar'
|
// QUERY: 'foo=bar'
|
||||||
// FRAGMENT: 'fragment'
|
// FRAGMENT: 'fragment'
|
||||||
enum component {
|
enum component {
|
||||||
SCHEME,
|
/* 0 */ SCHEME,
|
||||||
HIERARCHICAL,
|
/* 1 */ HIERARCHICAL,
|
||||||
AUTHORITY,
|
/* 2 */ AUTHORITY,
|
||||||
USER,
|
/* 3 */ USER,
|
||||||
HOST,
|
/* 4 */ HOST,
|
||||||
PORT,
|
/* 5 */ PORT,
|
||||||
PATH,
|
/* 6 */ PATH,
|
||||||
QUERY,
|
/* 7 */ QUERY,
|
||||||
FRAGMENT,
|
/* 8 */ FRAGMENT,
|
||||||
|
|
||||||
NUM_COMPONENTS
|
NUM_COMPONENTS
|
||||||
};
|
};
|
||||||
@ -108,6 +108,8 @@ namespace cruft {
|
|||||||
static std::string percent_decode (view<const char*>);
|
static std::string percent_decode (view<const char*>);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
void parse (void);
|
||||||
|
|
||||||
std::array<view<const char*>, NUM_COMPONENTS> m_views;
|
std::array<view<const char*>, NUM_COMPONENTS> m_views;
|
||||||
std::string m_value;
|
std::string m_value;
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user