2015-02-11 16:18:18 +11:00
|
|
|
/*
|
|
|
|
* This file is part of libgim.
|
|
|
|
*
|
|
|
|
* libgim is free software: you can redistribute it and/or modify it under the
|
|
|
|
* terms of the GNU General Public License as published by the Free Software
|
|
|
|
* Foundation, either version 3 of the License, or (at your option) any later
|
|
|
|
* version.
|
|
|
|
*
|
|
|
|
* libgim is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
|
|
* details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with libgim. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*
|
|
|
|
* Copyright 2015 Danny Robson <danny@nerdcruft.net>
|
|
|
|
*/
|
|
|
|
|
2015-02-09 17:43:24 +11:00
|
|
|
#include "uri.hpp"
|
|
|
|
|
|
|
|
#include "debug.hpp"
|
2017-12-15 18:57:10 +11:00
|
|
|
#include "iterator.hpp"
|
2015-02-09 17:43:24 +11:00
|
|
|
|
|
|
|
#include <algorithm>
|
|
|
|
#include <iostream>
|
|
|
|
|
2018-05-03 18:32:08 +10:00
|
|
|
// We generate some really old style C code via ragel here, so we have to
|
|
|
|
// disable some noisy warnings (doubly so given -Werror)
|
|
|
|
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
|
|
|
|
2018-08-05 14:42:02 +10:00
|
|
|
using cruft::uri;
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2017-12-26 17:33:06 +11:00
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
2015-02-09 17:43:24 +11:00
|
|
|
%%{
|
2017-12-20 12:45:05 +11:00
|
|
|
machine impl;
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2015-02-11 16:41:35 +11:00
|
|
|
action trace { if (0) std::cerr << *p; }
|
2015-02-09 17:43:24 +11:00
|
|
|
action success {__success = true; }
|
|
|
|
action failure {__success = false; }
|
|
|
|
|
2017-12-15 18:57:10 +11:00
|
|
|
action scheme_begin { m_views[SCHEME] = { p, p }; }
|
2015-02-11 16:41:09 +11:00
|
|
|
action scheme_end { m_views[SCHEME] = { m_views[SCHEME].begin (), p }; }
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2017-12-15 18:57:10 +11:00
|
|
|
action hier_begin { m_views[HIERARCHICAL] = { p, p }; }
|
|
|
|
action hier_end { m_views[HIERARCHICAL] = { m_views[HIERARCHICAL].begin (), p }; }
|
|
|
|
|
|
|
|
action user_begin { m_views[USER] = { p, p }; }
|
|
|
|
action user_end { m_views[USER] = { m_views[USER].begin (), p }; }
|
|
|
|
|
|
|
|
action host_begin { m_views[HOST] = { p, p }; }
|
|
|
|
action host_end { m_views[HOST] = { m_views[HOST].begin (), p }; }
|
|
|
|
|
|
|
|
action port_begin { m_views[PORT] = { p, p }; }
|
|
|
|
action port_end { m_views[PORT] = { m_views[PORT].begin (), p }; }
|
|
|
|
|
|
|
|
action authority_begin { m_views[AUTHORITY] = { p, p}; }
|
2015-02-11 16:41:09 +11:00
|
|
|
action authority_end { m_views[AUTHORITY] = { m_views[AUTHORITY].begin (), p }; }
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2017-12-15 18:57:10 +11:00
|
|
|
action path_begin { m_views[PATH] = { p, p}; }
|
2015-02-11 16:41:09 +11:00
|
|
|
action path_end { m_views[PATH] = { m_views[PATH].begin (), p }; }
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2017-12-15 18:57:10 +11:00
|
|
|
action query_begin { m_views[QUERY] = { p, p}; }
|
2015-02-11 16:41:09 +11:00
|
|
|
action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; }
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2017-12-15 18:57:10 +11:00
|
|
|
action fragment_begin { m_views[FRAGMENT] = { p, p}; }
|
2015-02-11 16:41:09 +11:00
|
|
|
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2017-12-20 12:45:05 +11:00
|
|
|
action uri_begin {}
|
|
|
|
action uri_end {}
|
|
|
|
|
|
|
|
include rfc3986 'rfc3986.rl';
|
|
|
|
|
2017-12-26 17:28:00 +11:00
|
|
|
impl := URI >uri_begin %uri_end
|
2015-02-09 17:43:24 +11:00
|
|
|
%success
|
|
|
|
$!failure
|
|
|
|
$trace;
|
|
|
|
|
|
|
|
write data;
|
|
|
|
}%%
|
|
|
|
|
|
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// URI
|
|
|
|
|
2018-08-05 14:42:02 +10:00
|
|
|
cruft::uri::uri (const char *str):
|
2015-02-09 17:43:24 +11:00
|
|
|
uri (std::string (str))
|
|
|
|
{ ; }
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
2018-08-05 14:42:02 +10:00
|
|
|
uri::uri (cruft::view<const char *> _value):
|
2017-12-26 17:33:06 +11:00
|
|
|
uri (std::string (_value.begin (), _value.end ()))
|
2015-02-09 17:43:24 +11:00
|
|
|
{ ; }
|
|
|
|
|
|
|
|
|
2017-12-26 17:33:06 +11:00
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
uri::uri (const std::string &_value):
|
|
|
|
uri (std::string (_value))
|
|
|
|
{ ; }
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
2018-08-05 14:42:02 +10:00
|
|
|
static const cruft::view<const char*> NULL_VIEW { nullptr, nullptr };
|
2015-09-21 15:36:05 +10:00
|
|
|
|
|
|
|
|
2015-02-09 17:43:24 +11:00
|
|
|
//-----------------------------------------------------------------------------
|
2018-08-05 14:42:02 +10:00
|
|
|
cruft::uri::uri (std::string &&_value):
|
2017-12-15 18:57:10 +11:00
|
|
|
m_views {
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW,
|
|
|
|
NULL_VIEW
|
|
|
|
},
|
2015-02-09 17:43:24 +11:00
|
|
|
m_value (std::move (_value))
|
|
|
|
{
|
|
|
|
const char *p = m_value.data ();
|
|
|
|
const char *pe = m_value.data () + m_value.size ();
|
|
|
|
const char *eof = pe;
|
|
|
|
|
|
|
|
bool __success = false;
|
|
|
|
|
|
|
|
int cs;
|
|
|
|
|
|
|
|
%%write init;
|
|
|
|
%%write exec;
|
|
|
|
|
|
|
|
if (!__success)
|
|
|
|
throw parse_error ("invalid uri");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
static uint8_t
|
|
|
|
hex_to_uint (char c)
|
|
|
|
{
|
|
|
|
if (c >= '0' && c <= '9')
|
|
|
|
return c - '0';
|
|
|
|
|
|
|
|
if (c >= 'A' && c <= 'F')
|
|
|
|
return c - 'A' + 10;
|
|
|
|
|
|
|
|
if (c >= 'a' && c <= 'f')
|
|
|
|
return c - 'a' + 10;
|
|
|
|
|
|
|
|
unreachable ();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
std::string
|
2018-08-05 14:42:02 +10:00
|
|
|
cruft::uri::percent_decode (view<const char*> s)
|
2015-02-09 17:43:24 +11:00
|
|
|
{
|
|
|
|
if (s.size () == 0)
|
|
|
|
return std::string ();
|
|
|
|
|
|
|
|
// Early check for late percent-encoding so we can simplify the decode loop
|
|
|
|
{
|
2015-02-11 16:41:09 +11:00
|
|
|
auto tail = std::find (s.size () < 3 ? s.begin ()
|
|
|
|
: s.end () - 2,
|
|
|
|
s.end (),
|
2015-02-09 17:43:24 +11:00
|
|
|
'%');
|
2015-02-11 16:41:09 +11:00
|
|
|
if (tail != s.end ())
|
2015-02-09 17:43:24 +11:00
|
|
|
throw parse_error ("triple overlaps end");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Allocate and size a potentially overlong output string. This allows us
|
|
|
|
// to copy directly into its buffer. We'll shorten it at the end.
|
|
|
|
std::string out;
|
|
|
|
out.resize (s.size ());
|
|
|
|
|
|
|
|
// Find the percent, copy until that, decode, advance, repeat.
|
|
|
|
auto out_cursor = out.begin ();
|
|
|
|
|
2015-02-11 16:41:09 +11:00
|
|
|
for (auto i = s.begin (); i < s.end (); ++i) {
|
|
|
|
auto cursor = std::find (i, s.end (), '%');
|
2015-02-09 17:43:24 +11:00
|
|
|
|
2015-02-11 16:41:09 +11:00
|
|
|
if (cursor == s.end ()) {
|
|
|
|
out_cursor = std::copy (i, s.end (), out_cursor);
|
2015-02-09 17:43:24 +11:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_cursor = std::copy (i, cursor, out_cursor);
|
|
|
|
*out_cursor = hex_to_uint (cursor[1]) << 4 | hex_to_uint(cursor[2]);
|
|
|
|
|
|
|
|
i += 3;
|
|
|
|
}
|
|
|
|
|
|
|
|
out.resize (out.end () - out_cursor);
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-12-15 18:57:10 +11:00
|
|
|
|
2015-02-09 17:43:24 +11:00
|
|
|
//-----------------------------------------------------------------------------
|
|
|
|
std::ostream&
|
2018-08-05 14:42:02 +10:00
|
|
|
cruft::operator<< (std::ostream &os, cruft::uri::component c)
|
2015-02-09 17:43:24 +11:00
|
|
|
{
|
|
|
|
switch (c) {
|
2018-08-05 14:42:02 +10:00
|
|
|
case cruft::uri::SCHEME: return os << "SCHEME";
|
|
|
|
case cruft::uri::HIERARCHICAL: return os << "HIERARCHICAL";
|
|
|
|
case cruft::uri::AUTHORITY: return os << "AUTHORITY";
|
|
|
|
case cruft::uri::USER: return os << "USER";
|
|
|
|
case cruft::uri::HOST: return os << "HOST";
|
|
|
|
case cruft::uri::PORT: return os << "PORT";
|
|
|
|
case cruft::uri::PATH: return os << "PATH";
|
|
|
|
case cruft::uri::QUERY: return os << "QUERY";
|
|
|
|
case cruft::uri::FRAGMENT: return os << "FRAGMENT";
|
|
|
|
|
|
|
|
case cruft::uri::NUM_COMPONENTS:
|
2015-02-09 17:43:24 +11:00
|
|
|
unreachable ();
|
|
|
|
}
|
2016-05-12 17:45:29 +10:00
|
|
|
|
|
|
|
unreachable ();
|
2015-02-09 17:43:24 +11:00
|
|
|
}
|