uri: add more data fields
we now provide more fine grained field extraction from URIs, focusing primarily on the utility of URLs.
This commit is contained in:
parent
c1036d8337
commit
b4175e4593
155
test/uri.cpp
155
test/uri.cpp
@ -10,32 +10,149 @@ main (void)
|
||||
|
||||
static const struct {
|
||||
const char *src;
|
||||
|
||||
const char *scheme;
|
||||
const char *hierarchical;
|
||||
const char *authority;
|
||||
const char *user;
|
||||
const char *host;
|
||||
const char *port;
|
||||
const char *path;
|
||||
const char *query;
|
||||
const char *fragment;
|
||||
} GOOD[] = {
|
||||
{ "ftp://ftp.is.co.za/rfc/rfc1808.txt", "ftp", "ftp.is.co.za", "/rfc/rfc1808.txt", "", "" },
|
||||
{ "http://www.ietf.org/rfc/rfc2396.txt", "http", "www.ietf.org", "/rfc/rfc2396.txt", "", "" },
|
||||
{ "ldap://[2001:db8::7]/c=GB?objectClass?one", "ldap", "[2001:db8::7]", "/c=GB", "objectClass?one", "" },
|
||||
{ "mailto:John.Doe@example.com", "mailto", "", "John.Doe@example.com", "", "" },
|
||||
{ "news:comp.infosystems.www.servers.unix", "news", "", "comp.infosystems.www.servers.unix", "", "" },
|
||||
{ "tel:+1-816-555-1212", "tel", "", "+1-816-555-1212", "", "" },
|
||||
{ "telnet://192.0.2.16:80/", "telnet", "192.0.2.16:80", "/", "", "" },
|
||||
{ "urn:oasis:names:specification:docbook:dtd:xml:4.1.2", "urn", "", "oasis:names:specification:docbook:dtd:xml:4.1.2", "", "" },
|
||||
// examples from rfc3986
|
||||
{
|
||||
.src = "ftp://ftp.is.co.za/rfc/rfc1808.txt",
|
||||
|
||||
.scheme = "ftp",
|
||||
.hierarchical = "ftp.is.co.za/rfc/rfc1808.txt",
|
||||
.authority = "ftp.is.co.za",
|
||||
.user = "",
|
||||
.host = "ftp.is.co.za",
|
||||
.port = "",
|
||||
.path = "/rfc/rfc1808.txt",
|
||||
.query = "",
|
||||
.fragment = "" },
|
||||
{
|
||||
.src = "http://www.ietf.org/rfc/rfc2396.txt",
|
||||
|
||||
.scheme = "http",
|
||||
.hierarchical = "www.ietf.org/rfc/rfc2396.txt",
|
||||
.authority = "www.ietf.org",
|
||||
.user = "",
|
||||
.host = "www.ietf.org",
|
||||
.port = "",
|
||||
.path = "/rfc/rfc2396.txt",
|
||||
.query = "",
|
||||
.fragment = "" },
|
||||
|
||||
{
|
||||
.src = "ldap://[2001:db8::7]/c=GB?objectClass?one",
|
||||
|
||||
.scheme = "ldap",
|
||||
.hierarchical = "[2001:db8::7]/c=GB",
|
||||
.authority = "[2001:db8::7]",
|
||||
.user = "",
|
||||
.host = "[2001:db8::7]",
|
||||
.port = "",
|
||||
.path = "/c=GB",
|
||||
.query = "objectClass?one",
|
||||
.fragment = "" },
|
||||
|
||||
{
|
||||
.src = "mailto:John.Doe@example.com",
|
||||
|
||||
.scheme= "mailto",
|
||||
.hierarchical = "John.Doe@example.com",
|
||||
.authority= "",
|
||||
.user = "",
|
||||
.host = "",
|
||||
.port = "",
|
||||
.path= "John.Doe@example.com",
|
||||
.query= "",
|
||||
.fragment= "" },
|
||||
|
||||
{
|
||||
.src = "news:comp.infosystems.www.servers.unix",
|
||||
.scheme= "news",
|
||||
.hierarchical = "comp.infosystems.www.servers.unix",
|
||||
.authority= "",
|
||||
.user = "",
|
||||
.host = "",
|
||||
.port = "",
|
||||
.path= "comp.infosystems.www.servers.unix",
|
||||
.query= "",
|
||||
.fragment= "" },
|
||||
|
||||
{
|
||||
.src = "tel:+1-816-555-1212",
|
||||
|
||||
.scheme= "tel",
|
||||
.hierarchical = "+1-816-555-1212",
|
||||
.authority= "",
|
||||
.user = "",
|
||||
.host = "",
|
||||
.port = "",
|
||||
.path= "+1-816-555-1212",
|
||||
.query= "",
|
||||
.fragment= "" },
|
||||
|
||||
{
|
||||
.src = "telnet://192.0.2.16:80/",
|
||||
|
||||
.scheme= "telnet",
|
||||
.hierarchical = "192.0.2.16:80/",
|
||||
.authority= "192.0.2.16:80",
|
||||
.user = "",
|
||||
.host = "192.0.2.16",
|
||||
.port = "80",
|
||||
.path= "/",
|
||||
.query= "",
|
||||
.fragment= "" },
|
||||
|
||||
{
|
||||
.src = "urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
|
||||
|
||||
.scheme= "urn",
|
||||
.hierarchical = "oasis:names:specification:docbook:dtd:xml:4.1.2",
|
||||
.authority= "",
|
||||
.user = "",
|
||||
.host = "",
|
||||
.port = "",
|
||||
.path= "oasis:names:specification:docbook:dtd:xml:4.1.2",
|
||||
.query= "",
|
||||
.fragment= "" },
|
||||
|
||||
|
||||
// a case with all possible components
|
||||
{
|
||||
.src = "https://user:password@example.com:80/path/to?foo=bar#fragment",
|
||||
|
||||
.scheme = "https",
|
||||
.hierarchical = "user:password@example.com:80/path/to",
|
||||
.authority = "user:password@example.com:80",
|
||||
.user = "user:password",
|
||||
.host = "example.com",
|
||||
.port = "80",
|
||||
.path = "/path/to",
|
||||
.query = "foo=bar",
|
||||
.fragment = "fragment" },
|
||||
};
|
||||
|
||||
for (auto i: GOOD) {
|
||||
tap.expect_nothrow ([i] (void) { util::uri foo (i.src); }, "nothrow parsing '%s'", i.src);
|
||||
for (auto t: GOOD) {
|
||||
tap.expect_nothrow ([t] (void) { util::uri foo (t.src); }, "nothrow parsing '%s'", t.src);
|
||||
util::uri u (t.src);
|
||||
|
||||
util::uri u (i.src);
|
||||
|
||||
tap.expect (std::equal (u.get (util::uri::SCHEME).begin (), u.get (util::uri::SCHEME).end (), i.scheme), "extracting scheme for '%s'", i.src);
|
||||
tap.expect (std::equal (u.get (util::uri::AUTHORITY).begin (), u.get (util::uri::AUTHORITY).end (), i.authority), "extracting authority '%s'", i.src);
|
||||
tap.expect (std::equal (u.get (util::uri::PATH).begin (), u.get (util::uri::PATH).end (), i.path), "extracting path '%s'", i.src);
|
||||
tap.expect (std::equal (u.get (util::uri::QUERY).begin (), u.get (util::uri::QUERY).end (), i.query), "extracting query '%s'", i.src);
|
||||
tap.expect (std::equal (u.get (util::uri::FRAGMENT).begin (), u.get (util::uri::FRAGMENT).end (), i.fragment), "extracting fragment '%s'", i.src);
|
||||
tap.expect (equal (u.scheme (), t.scheme), "scheme for '%s'", t.src);
|
||||
tap.expect (equal (u.heirarchical (), t.hierarchical), "hierarchical for '%s'", t.src);
|
||||
tap.expect (equal (u.authority (), t.authority), "authority for '%s'", t.src);
|
||||
tap.expect (equal (u.host (), t.host), "host for '%s'", t.src);
|
||||
tap.expect (equal (u.user (), t.user), "user for '%s'", t.src);
|
||||
tap.expect (equal (u.port (), t.port), "port for '%s'", t.src);
|
||||
tap.expect (equal (u.path (), t.path), "path for '%s'", t.src);
|
||||
tap.expect (equal (u.query (), t.query), "query for '%s'", t.src);
|
||||
tap.expect (equal (u.fragment (), t.fragment), "fragment for '%s'", t.src);
|
||||
}
|
||||
|
||||
static const char* BAD[] = {
|
||||
@ -43,7 +160,9 @@ main (void)
|
||||
};
|
||||
|
||||
for (auto i: BAD)
|
||||
tap.expect_throw<util::uri::parse_error> ([i] (void) { util::uri foo (i); }, "throw parsing '%s'", i);
|
||||
tap.expect_throw<util::uri::parse_error> (
|
||||
[i] (void) { util::uri foo (i); }, "throw parsing '%s'", i
|
||||
);
|
||||
|
||||
|
||||
return tap.status ();
|
||||
|
@ -43,23 +43,26 @@ static const struct {
|
||||
};
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//-----------------------------------------------------------------------------
|
||||
int
|
||||
main () {
|
||||
|
||||
util::TAP::logger tap;
|
||||
|
||||
for (const auto &i: PARSE_TESTS) {
|
||||
util::version v (i.str);
|
||||
for (const auto &t: PARSE_TESTS) {
|
||||
util::version v (t.str);
|
||||
|
||||
tap.expect (std::equal (v.begin (), v.end (), i.parts) && v.release == i.release, "%s", i.msg);
|
||||
bool parts = std::equal (v.begin (), v.end (), t.parts);
|
||||
bool release = v.release == t.release;
|
||||
|
||||
tap.expect (parts && release, "%s", t.msg);
|
||||
}
|
||||
|
||||
|
||||
for (const auto &t: CMP_TESTS) {
|
||||
bool eq = t.a == t.b,
|
||||
lt = t.a < t.b,
|
||||
gt = t.a > t.b;
|
||||
const bool eq = t.a == t.b;
|
||||
const bool lt = t.a < t.b;
|
||||
const bool gt = t.a > t.b;
|
||||
|
||||
tap.expect (t.eq == eq, "%s: equality", t.msg);
|
||||
tap.expect (t.lt == lt, "%s: less-than", t.msg);
|
||||
|
84
uri.cpp.rl
84
uri.cpp.rl
@ -20,6 +20,7 @@
|
||||
#include "uri.hpp"
|
||||
|
||||
#include "debug.hpp"
|
||||
#include "iterator.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
@ -32,19 +33,31 @@
|
||||
action success {__success = true; }
|
||||
action failure {__success = false; }
|
||||
|
||||
action scheme_begin { m_views[SCHEME] = { p, nullptr }; }
|
||||
action scheme_begin { m_views[SCHEME] = { p, p }; }
|
||||
action scheme_end { m_views[SCHEME] = { m_views[SCHEME].begin (), p }; }
|
||||
|
||||
action authority_begin { m_views[AUTHORITY] = { p, nullptr}; }
|
||||
action hier_begin { m_views[HIERARCHICAL] = { p, p }; }
|
||||
action hier_end { m_views[HIERARCHICAL] = { m_views[HIERARCHICAL].begin (), p }; }
|
||||
|
||||
action user_begin { m_views[USER] = { p, p }; }
|
||||
action user_end { m_views[USER] = { m_views[USER].begin (), p }; }
|
||||
|
||||
action host_begin { m_views[HOST] = { p, p }; }
|
||||
action host_end { m_views[HOST] = { m_views[HOST].begin (), p }; }
|
||||
|
||||
action port_begin { m_views[PORT] = { p, p }; }
|
||||
action port_end { m_views[PORT] = { m_views[PORT].begin (), p }; }
|
||||
|
||||
action authority_begin { m_views[AUTHORITY] = { p, p}; }
|
||||
action authority_end { m_views[AUTHORITY] = { m_views[AUTHORITY].begin (), p }; }
|
||||
|
||||
action path_begin { m_views[PATH] = { p, nullptr}; }
|
||||
action path_begin { m_views[PATH] = { p, p}; }
|
||||
action path_end { m_views[PATH] = { m_views[PATH].begin (), p }; }
|
||||
|
||||
action query_begin { m_views[QUERY] = { p, nullptr}; }
|
||||
action query_begin { m_views[QUERY] = { p, p}; }
|
||||
action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; }
|
||||
|
||||
action fragment_begin { m_views[FRAGMENT] = { p, nullptr}; }
|
||||
action fragment_begin { m_views[FRAGMENT] = { p, p}; }
|
||||
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }
|
||||
|
||||
## Characters
|
||||
@ -100,9 +113,18 @@
|
||||
reserved = gen_delim | sub_delim;
|
||||
|
||||
## Authority
|
||||
port = digit*;
|
||||
host = ip_literal | ipv4address | reg_name;
|
||||
userinfo = (unreserved | pct_encoded | sub_delim | ':')*;
|
||||
port = (
|
||||
digit*
|
||||
) >port_begin %port_end;
|
||||
|
||||
host = (
|
||||
ip_literal | ipv4address | reg_name
|
||||
) >host_begin %host_end;
|
||||
|
||||
userinfo = (
|
||||
(unreserved | pct_encoded | sub_delim | ':')*
|
||||
) >user_begin %user_end;
|
||||
|
||||
authority = (
|
||||
(userinfo '@')? host (':' port)?
|
||||
) >authority_begin %authority_end;
|
||||
@ -122,12 +144,13 @@
|
||||
) >fragment_begin %fragment_end;
|
||||
|
||||
## URI types
|
||||
hier_part =
|
||||
'//' authority path_abempty >path_begin %path_end
|
||||
| path_absolute >path_begin %path_end
|
||||
hier_part = (
|
||||
'//' (authority path_abempty >path_begin %path_end) >hier_begin %hier_end
|
||||
) | (
|
||||
path_absolute >path_begin %path_end
|
||||
| path_rootless >path_begin %path_end
|
||||
| path_empty >path_begin %path_end
|
||||
;
|
||||
) >hier_begin %hier_end;
|
||||
|
||||
uri = scheme ':' hier_part ('?' query)? ('#' fragment);
|
||||
|
||||
@ -175,7 +198,17 @@ static const util::view<const char*> NULL_VIEW { nullptr, nullptr };
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
util::uri::uri (std::string &&_value):
|
||||
m_views {NULL_VIEW, NULL_VIEW, NULL_VIEW, NULL_VIEW, NULL_VIEW},
|
||||
m_views {
|
||||
NULL_VIEW,
|
||||
NULL_VIEW,
|
||||
NULL_VIEW,
|
||||
NULL_VIEW,
|
||||
NULL_VIEW,
|
||||
NULL_VIEW,
|
||||
NULL_VIEW,
|
||||
NULL_VIEW,
|
||||
NULL_VIEW
|
||||
},
|
||||
m_value (std::move (_value))
|
||||
{
|
||||
const char *p = m_value.data ();
|
||||
@ -196,7 +229,7 @@ util::uri::uri (std::string &&_value):
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
util::view<const char*>
|
||||
util::uri::get (util::uri::component c)
|
||||
util::uri::get (util::uri::component c) const
|
||||
{
|
||||
CHECK_NEQ (c, NUM_COMPONENTS);
|
||||
return m_views[c];
|
||||
@ -264,16 +297,21 @@ util::uri::percent_decode (view<const char*> s)
|
||||
}
|
||||
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
std::ostream&
|
||||
util::operator<< (std::ostream &os, util::uri::component c)
|
||||
{
|
||||
switch (c) {
|
||||
case util::uri::SCHEME: return os << "SCHEME";
|
||||
case util::uri::AUTHORITY: return os << "AUTHORITY";
|
||||
case util::uri::PATH: return os << "PATH";
|
||||
case util::uri::QUERY: return os << "QUERY";
|
||||
case util::uri::FRAGMENT: return os << "FRAGMENT";
|
||||
case util::uri::SCHEME: return os << "SCHEME";
|
||||
case util::uri::HIERARCHICAL: return os << "HIERARCHICAL";
|
||||
case util::uri::AUTHORITY: return os << "AUTHORITY";
|
||||
case util::uri::USER: return os << "USER";
|
||||
case util::uri::HOST: return os << "HOST";
|
||||
case util::uri::PORT: return os << "PORT";
|
||||
case util::uri::PATH: return os << "PATH";
|
||||
case util::uri::QUERY: return os << "QUERY";
|
||||
case util::uri::FRAGMENT: return os << "FRAGMENT";
|
||||
|
||||
case util::uri::NUM_COMPONENTS:
|
||||
unreachable ();
|
||||
@ -281,3 +319,11 @@ util::operator<< (std::ostream &os, util::uri::component c)
|
||||
|
||||
unreachable ();
|
||||
}
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
std::ostream&
|
||||
util::operator<< (std::ostream &os, const util::uri &val)
|
||||
{
|
||||
return os << '[' << util::make_infix (val.components ()) << ']';
|
||||
}
|
||||
|
56
uri.hpp
56
uri.hpp
@ -11,20 +11,33 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Copyright 2015 Danny Robson <danny@nerdcruft.net>
|
||||
* Copyright 2015, 2017 Danny Robson <danny@nerdcruft.net>
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __UTIL_URI_HPP
|
||||
#define __UTIL_URI_HPP
|
||||
#ifndef CRUFT_UTIL_URI_HPP
|
||||
#define CRUFT_UTIL_URI_HPP
|
||||
|
||||
#include "view.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
namespace util {
|
||||
// parsing of rfc3986 uniform resource identifiers
|
||||
//
|
||||
// does not currently perform normalisation (scheme or protocol),
|
||||
// comparison, or other associated operations. though these should be
|
||||
// added in the future.
|
||||
//
|
||||
// note that the parsed results may not always conform to expectations
|
||||
// for some protocols. eg, mailto identifiers are complex to parse
|
||||
// reliably and would require a specialised parser to be reliable.
|
||||
//
|
||||
// not all fields will be present for all protocols (or all instances of
|
||||
// any given protocol). eg, the "tel" is unlikely to have port numbers.
|
||||
class uri {
|
||||
public:
|
||||
explicit uri (std::string &&);
|
||||
@ -34,25 +47,54 @@ namespace util {
|
||||
class parse_error : public std::runtime_error
|
||||
{ using runtime_error::runtime_error; };
|
||||
|
||||
|
||||
// URI: 'https://user:password@example.com:80/path/to?foo=bar#fragment'
|
||||
//
|
||||
// SCHEME: 'https'
|
||||
// HIERARCHICAL: 'user:password@example.com:80/path/to'
|
||||
// AUTHORITY: 'user:password@example.com:80'
|
||||
// USER: 'user:password'
|
||||
// HOST: 'example.com'
|
||||
// PORT: '80'
|
||||
// PATH: '/path/to'
|
||||
// QUERY: 'foo=bar'
|
||||
// FRAGMENT: 'fragment'
|
||||
enum component {
|
||||
SCHEME,
|
||||
AUTHORITY,
|
||||
PATH,
|
||||
HIERARCHICAL,
|
||||
AUTHORITY,
|
||||
USER,
|
||||
HOST,
|
||||
PORT,
|
||||
PATH,
|
||||
QUERY,
|
||||
FRAGMENT,
|
||||
|
||||
NUM_COMPONENTS
|
||||
};
|
||||
|
||||
view<const char*> get (component);
|
||||
view<const char*> get (component) const;
|
||||
|
||||
view<const char*> scheme (void) const { return get (SCHEME); }
|
||||
view<const char*> heirarchical (void) const { return get (HIERARCHICAL); }
|
||||
view<const char*> authority (void) const { return get (AUTHORITY); }
|
||||
view<const char*> user (void) const { return get (USER); }
|
||||
view<const char*> host (void) const { return get (HOST); }
|
||||
view<const char*> port (void) const { return get (PORT); }
|
||||
view<const char*> path (void) const { return get (PATH); }
|
||||
view<const char*> query (void) const { return get (QUERY); }
|
||||
view<const char*> fragment (void) const { return get (FRAGMENT); }
|
||||
|
||||
auto components (void) const noexcept { return m_views; }
|
||||
|
||||
static std::string percent_decode (view<const char*>);
|
||||
|
||||
private:
|
||||
view<const char*> m_views[NUM_COMPONENTS];
|
||||
std::array<view<const char*>, NUM_COMPONENTS> m_views;
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
std::ostream& operator<< (std::ostream&, const uri&);
|
||||
std::ostream& operator<< (std::ostream&, uri::component);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user