Danny Robson
f6056153e3
This places, at long last, the core library code into the same namespace as the extended library code.
222 lines
6.3 KiB
Ragel
222 lines
6.3 KiB
Ragel
/*
|
|
* This file is part of libgim.
|
|
*
|
|
* libgim is free software: you can redistribute it and/or modify it under the
|
|
* terms of the GNU General Public License as published by the Free Software
|
|
* Foundation, either version 3 of the License, or (at your option) any later
|
|
* version.
|
|
*
|
|
* libgim is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with libgim. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* Copyright 2015 Danny Robson <danny@nerdcruft.net>
|
|
*/
|
|
|
|
#include "uri.hpp"
|
|
|
|
#include "debug.hpp"
|
|
#include "iterator.hpp"
|
|
|
|
#include <algorithm>
|
|
#include <iostream>
|
|
|
|
// We generate some really old style C code via ragel here, so we have to
|
|
// disable some noisy warnings (doubly so given -Werror)
|
|
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
|
|
|
using cruft::uri;
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
%%{
|
|
machine impl;
|
|
|
|
action trace { if (0) std::cerr << *p; }
|
|
action success {__success = true; }
|
|
action failure {__success = false; }
|
|
|
|
action scheme_begin { m_views[SCHEME] = { p, p }; }
|
|
action scheme_end { m_views[SCHEME] = { m_views[SCHEME].begin (), p }; }
|
|
|
|
action hier_begin { m_views[HIERARCHICAL] = { p, p }; }
|
|
action hier_end { m_views[HIERARCHICAL] = { m_views[HIERARCHICAL].begin (), p }; }
|
|
|
|
action user_begin { m_views[USER] = { p, p }; }
|
|
action user_end { m_views[USER] = { m_views[USER].begin (), p }; }
|
|
|
|
action host_begin { m_views[HOST] = { p, p }; }
|
|
action host_end { m_views[HOST] = { m_views[HOST].begin (), p }; }
|
|
|
|
action port_begin { m_views[PORT] = { p, p }; }
|
|
action port_end { m_views[PORT] = { m_views[PORT].begin (), p }; }
|
|
|
|
action authority_begin { m_views[AUTHORITY] = { p, p}; }
|
|
action authority_end { m_views[AUTHORITY] = { m_views[AUTHORITY].begin (), p }; }
|
|
|
|
action path_begin { m_views[PATH] = { p, p}; }
|
|
action path_end { m_views[PATH] = { m_views[PATH].begin (), p }; }
|
|
|
|
action query_begin { m_views[QUERY] = { p, p}; }
|
|
action query_end { m_views[QUERY] = { m_views[QUERY].begin (), p }; }
|
|
|
|
action fragment_begin { m_views[FRAGMENT] = { p, p}; }
|
|
action fragment_end { m_views[FRAGMENT] = { m_views[FRAGMENT].begin (), p }; }
|
|
|
|
action uri_begin {}
|
|
action uri_end {}
|
|
|
|
include rfc3986 'rfc3986.rl';
|
|
|
|
impl := URI >uri_begin %uri_end
|
|
%success
|
|
$!failure
|
|
$trace;
|
|
|
|
write data;
|
|
}%%
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// URI
|
|
|
|
cruft::uri::uri (const char *str):
|
|
uri (std::string (str))
|
|
{ ; }
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
uri::uri (cruft::view<const char *> _value):
|
|
uri (std::string (_value.begin (), _value.end ()))
|
|
{ ; }
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
uri::uri (const std::string &_value):
|
|
uri (std::string (_value))
|
|
{ ; }
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
static const cruft::view<const char*> NULL_VIEW { nullptr, nullptr };
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
cruft::uri::uri (std::string &&_value):
|
|
m_views {
|
|
NULL_VIEW,
|
|
NULL_VIEW,
|
|
NULL_VIEW,
|
|
NULL_VIEW,
|
|
NULL_VIEW,
|
|
NULL_VIEW,
|
|
NULL_VIEW,
|
|
NULL_VIEW,
|
|
NULL_VIEW
|
|
},
|
|
m_value (std::move (_value))
|
|
{
|
|
const char *p = m_value.data ();
|
|
const char *pe = m_value.data () + m_value.size ();
|
|
const char *eof = pe;
|
|
|
|
bool __success = false;
|
|
|
|
int cs;
|
|
|
|
%%write init;
|
|
%%write exec;
|
|
|
|
if (!__success)
|
|
throw parse_error ("invalid uri");
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
static uint8_t
|
|
hex_to_uint (char c)
|
|
{
|
|
if (c >= '0' && c <= '9')
|
|
return c - '0';
|
|
|
|
if (c >= 'A' && c <= 'F')
|
|
return c - 'A' + 10;
|
|
|
|
if (c >= 'a' && c <= 'f')
|
|
return c - 'a' + 10;
|
|
|
|
unreachable ();
|
|
}
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
std::string
|
|
cruft::uri::percent_decode (view<const char*> s)
|
|
{
|
|
if (s.size () == 0)
|
|
return std::string ();
|
|
|
|
// Early check for late percent-encoding so we can simplify the decode loop
|
|
{
|
|
auto tail = std::find (s.size () < 3 ? s.begin ()
|
|
: s.end () - 2,
|
|
s.end (),
|
|
'%');
|
|
if (tail != s.end ())
|
|
throw parse_error ("triple overlaps end");
|
|
}
|
|
|
|
// Allocate and size a potentially overlong output string. This allows us
|
|
// to copy directly into its buffer. We'll shorten it at the end.
|
|
std::string out;
|
|
out.resize (s.size ());
|
|
|
|
// Find the percent, copy until that, decode, advance, repeat.
|
|
auto out_cursor = out.begin ();
|
|
|
|
for (auto i = s.begin (); i < s.end (); ++i) {
|
|
auto cursor = std::find (i, s.end (), '%');
|
|
|
|
if (cursor == s.end ()) {
|
|
out_cursor = std::copy (i, s.end (), out_cursor);
|
|
break;
|
|
}
|
|
|
|
out_cursor = std::copy (i, cursor, out_cursor);
|
|
*out_cursor = hex_to_uint (cursor[1]) << 4 | hex_to_uint(cursor[2]);
|
|
|
|
i += 3;
|
|
}
|
|
|
|
out.resize (out.end () - out_cursor);
|
|
return out;
|
|
}
|
|
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
std::ostream&
|
|
cruft::operator<< (std::ostream &os, cruft::uri::component c)
|
|
{
|
|
switch (c) {
|
|
case cruft::uri::SCHEME: return os << "SCHEME";
|
|
case cruft::uri::HIERARCHICAL: return os << "HIERARCHICAL";
|
|
case cruft::uri::AUTHORITY: return os << "AUTHORITY";
|
|
case cruft::uri::USER: return os << "USER";
|
|
case cruft::uri::HOST: return os << "HOST";
|
|
case cruft::uri::PORT: return os << "PORT";
|
|
case cruft::uri::PATH: return os << "PATH";
|
|
case cruft::uri::QUERY: return os << "QUERY";
|
|
case cruft::uri::FRAGMENT: return os << "FRAGMENT";
|
|
|
|
case cruft::uri::NUM_COMPONENTS:
|
|
unreachable ();
|
|
}
|
|
|
|
unreachable ();
|
|
}
|