json: support sax- and dom-like interfaces

This commit is contained in:
Danny Robson 2015-02-02 23:00:38 +11:00
parent cfc0f8ecd0
commit 0e1eaa91d3
5 changed files with 345 additions and 221 deletions

View File

@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with libgim. If not, see <http://www.gnu.org/licenses/>. * along with libgim. If not, see <http://www.gnu.org/licenses/>.
* *
* Copyright 2010-2012 Danny Robson <danny@nerdcruft.net> * Copyright 2010-2015 Danny Robson <danny@nerdcruft.net>
*/ */
@ -41,215 +41,172 @@ using namespace std;
using namespace util; using namespace util;
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Parsing
struct parse_context {
parse_context(json::tree::node *_root):
root (_root),
value (NULL),
key (NULL),
start (NULL),
stop (NULL)
{ ; }
json::tree::node *root,
*value,
*key;
const char *start,
*stop;
};
%%{ %%{
# JSON (rfc7159)
machine json; machine json;
## Record whether parsing was successful for future use action trace { if (false) std::cerr << *p; }
action success action success { __success = true; }
{ __success = true; } action failure { }
action failure { action new_line { ++line; }
__success = false;
/*std::cerr << std::endl action first { parsed.push_back ({ type::UNKNOWN, p, p}); }
<< "Failure on: '" << fc << "' in level " << top << " at " << fpc - p action last { parsed.back ().last = p; }
<< std::endl;
*/ action tag_nul { parsed.back ().tag = type::NUL; }
} action tag_boolean { parsed.back ().tag = type::BOOLEAN; }
action tag_string { parsed.back ().tag = type::STRING; }
action tag_integer { parsed.back ().tag = type::INTEGER; }
action tag_real { parsed.back ().tag = type::REAL; }
action tag_object_begin { parsed.push_back ({ type::OBJECT_BEGIN, p, p + 1 }); }
action tag_object_end { parsed.push_back ({ type::OBJECT_END, p, p + 1 }); }
action tag_array_begin { parsed.push_back ({ type::ARRAY_BEGIN, p, p + 1 }); }
action tag_array_end { parsed.push_back ({ type::ARRAY_END, p, p + 1 }); }
# Line counter
lines = (
any | '\n' @new_line
)*;
# UTF-8 (rfc3629)
utf8_tail = 0x80..0xbf;
utf8_1 = 0x00..0x7f;
utf8_2 = 0xc2..0xdf utf8_tail;
utf8_3 = 0xe0 0xa0..0xbf utf8_tail |
0xe1..0xec utf8_tail{2} |
0xed 0x80..0x9f utf8_tail |
0xee..0xef utf8_tail{2};
utf8_4 = 0xf0 0x90..0xbf utf8_tail{2} |
0xf1..0xf3 utf8_tail{3} |
0xf4 0x80..0x8f utf8_tail{2};
action new_object { nodestack.push_back (parse_context(new json::tree::object)); } utf8 = utf8_1 | utf8_2 | utf8_3 | utf8_4;
action new_array { nodestack.push_back (parse_context(new json::tree::array)); }
action new_object_value { # Utility
CHECK (nodestack.back ().root->is_object ()); ws = 0x20 | 0x09 | 0x0A | 0x0D;
CHECK (nodestack.back ().key); array_start = '[';
CHECK (nodestack.back ().value); array_end = ']';
object_start = '{';
object_end = '}';
if (!nodestack.back ().key->is_string ()) # Strings
throw parse_error ("object keys must be strings"); char =
(utf8 - ["\\])
| "\\" (
[\\"/bfnrt]
| "u" xdigit{4}
)
;
json::tree::object *object = (json::tree::object*)nodestack.back ().root; string = ('"' char* '"') >first >tag_string %*last;
object->insert (nodestack.back ().key->as_string (),
unique_ptr<json::tree::node> (nodestack.back ().value));
nodestack.back ().key = NULL;
nodestack.back ().value = NULL;
}
action new_array_value { # numbers
CHECK (nodestack.back ().root->is_array ()); int = '0' | [1-9] digit*;
CHECK (nodestack.back ().value);
json::tree::array *array = (json::tree::array *)nodestack.back ().root; frac = '.' digit+;
array->insert (unique_ptr<json::tree::node> (nodestack.back ().value)); e = 'e'i[+\-]?;
nodestack.back ().value = NULL; exp = e digit+;
}
action new_string { number = (
CHECK (!nodestack.empty ()); '-'?
CHECK (!nodestack.back ().value); int
(frac >tag_real)?
exp?
) >tag_integer;
std::string value (std::string (nodestack.back ().start, # wrapper types
nodestack.back ().stop)); array = array_start @{ fhold; fcall array_members; } array_end;
nodestack.back ().value = new json::tree::string(value); object = object_start @{ fhold; fcall object_members; } object_end;
}
action new_boolean { # simple types; case sensitive literals
CHECK (!nodestack.empty ()); bool = ("true" | "false") >tag_boolean;
CHECK (!nodestack.back ().value); nul = "null" >tag_nul;
literal = bool | nul;
throw parse_error ("unable to parse boolean"); value = object | array | (number | string | literal) >first %last;
}
action new_number { # Complex
CHECK (!nodestack.empty ()); member = string ws* ':' ws* value;
parse_context &back = nodestack.back (); array_members := ((
CHECK (!back.value); array_start >tag_array_begin ws* (value ws* (',' ws* value ws*)*)? array_end >tag_array_end
CHECK (back.start); ) & lines)
CHECK (back.stop); @{ fhold; fret; } $trace $!failure;
CHECK_LE (back.start, back.stop);
errno = 0; object_members := ((
char *end; object_start >tag_object_begin ws* (member ws* (',' ws* member ws*)*)? object_end >tag_object_end
double value = strtod (back.start, &end); ) & lines)
if (end == back.start || errno) @{ fhold; fret; } $trace $!failure;
throw parse_error ("unable to parse number");
back.value = new json::tree::number (value);
}
action new_null { # meta types
CHECK (!nodestack.empty ()); document := ((ws* value ws*) & lines)
CHECK (!nodestack.back ().value); %success
$!failure
$trace;
nodestack.back().value = new json::tree::null (); variable stack ragelstack;
} prepush { ragelstack.push_back (0); }
postpop { ragelstack.pop_back (); }
action new_object_key {
CHECK (!nodestack.empty ());
CHECK (nodestack.back ().root->is_object ());
CHECK (nodestack.back ().value);
CHECK (!nodestack.back ().key);
nodestack.back ().key = nodestack.back ().value;
nodestack.back ().value = NULL;
}
prepush {
fsmstack.push_back (0);
}
postpop {
fsmstack.pop_back ();
__root = nodestack.back ().root;
if (nodestack.size () > 1)
(nodestack.rbegin () + 1)->value = nodestack.back ().root;
nodestack.pop_back ();
}
variable stack fsmstack;
alphtype char;
## numerical
exp = [eE]('-' | '+')? digit+;
frac = '.' digit+;
int = '-'? [1-9]? digit+;
number = int ( frac
| exp
| frac exp)?;
## textual
char =
any - (cntrl | '\"' | '\\')
| '\\\"'
| '\\\\'
| '\\/'
| '\\b'
| '\\f'
| '\\n'
| '\\r'
| '\\t'
| '\\u' xdigit{4};
string = ('"'
char* >{ nodestack.back ().start = fpc; }
%{ nodestack.back ().stop = fpc; })
'"'
@new_string;
## other
boolean =
'true' @{ nodestack.back ().value = new json::tree::boolean ( true); }
| 'false' @{ nodestack.back ().value = new json::tree::boolean (false); };
## components
object = '{' @{ fhold; fcall _object; } '}';
array = '[' @{ fhold; fcall _array; } ']';
value =
string
| boolean
| number >{ nodestack.back ().start = fpc; } %{ nodestack.back ().stop = fpc; } %new_number
| object
| array
| 'null' %new_null;
## compound data types
_array := ('[' @new_array
space* ((value %new_array_value space* ',' space*)* value %new_array_value space*)?
']')
$!failure
@{ fhold; fret; };
pair = string %new_object_key space* ':' space* value %new_object_value;
_object := ('{' @new_object
space* ((pair space* ',' space*)* pair space*)?
'}')
$!failure
@{ fhold; fret; };
json := (space* (object | array) space*)
$!failure
%success
>{ __success = false; };
write data; write data;
}%% }%%
std::ostream& operator<< (std::ostream &os, json::flat::type);
//-----------------------------------------------------------------------------
std::vector<json::flat::item>
json::flat::parse (const char *first, const char *last)
{
const char *p = first;
const char *pe = last;
const char *eof = pe;
std::deque<int> ragelstack;
std::vector<item> parsed;
size_t line = 0;
int cs, top;
bool __success = false;
%%write init;
%%write exec;
if (!__success)
throw parse_error (line, "parse error");
return parsed;
}
std::vector<json::flat::item>
json::flat::parse (const boost::filesystem::path &path)
{
util::mapped_file f (path);
return parse ((const char *)f.cbegin (), (const char*)f.cend ());
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// External support // External support
template <> template <>
bool bool
is_integer (const json::tree::number &node) { is_integer (const json::tree::number &node)
{
return is_integer (node.native ()); return is_integer (node.native ());
} }
template <> template <>
bool bool
is_integer (const json::tree::node &node) { is_integer (const json::tree::node &node)
{
return node.is_number () && return node.is_number () &&
is_integer (node.as_number ()); is_integer (node.as_number ());
} }
@ -257,11 +214,110 @@ is_integer (const json::tree::node &node) {
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Node // Node
static std::vector<json::flat::item>::const_iterator
parse (std::vector<json::flat::item>::const_iterator first,
std::vector<json::flat::item>::const_iterator last,
std::unique_ptr<json::tree::node> &output);
//-----------------------------------------------------------------------------
static std::vector<json::flat::item>::const_iterator
parse (std::vector<json::flat::item>::const_iterator first,
std::vector<json::flat::item>::const_iterator last,
json::tree::array &parent)
{
for (auto cursor = first; cursor != last; ) {
if (cursor->tag == json::flat::type::ARRAY_END)
return cursor + 1;
std::unique_ptr<json::tree::node> value;
cursor = ::parse (cursor, last, value);
parent.insert (std::move (value));
}
unreachable ();
}
//-----------------------------------------------------------------------------
static std::vector<json::flat::item>::const_iterator
parse (std::vector<json::flat::item>::const_iterator first,
std::vector<json::flat::item>::const_iterator last,
json::tree::object &parent)
{
for (auto cursor = first; cursor != last; ) {
if (cursor->tag == json::flat::type::OBJECT_END)
return cursor + 1;
CHECK_EQ (cursor->tag, json::flat::type::STRING);
std::string key (cursor->first + 1, cursor->last - 1);
++cursor;
std::unique_ptr<json::tree::node> val;
cursor = ::parse (cursor, last, val);
parent.insert (key, std::move (val));
}
unreachable ();
}
//-----------------------------------------------------------------------------
static std::vector<json::flat::item>::const_iterator
parse (std::vector<json::flat::item>::const_iterator first,
std::vector<json::flat::item>::const_iterator last,
std::unique_ptr<json::tree::node> &output)
{
CHECK (first != last);
CHECK (output.get () == nullptr);
switch (first->tag) {
case json::flat::type::NUL:
output.reset (new json::tree::null ());
return first + 1;
case json::flat::type::BOOLEAN:
CHECK (*first->first == 't' || *first->first == 'f');
output.reset (new json::tree::boolean (*first->first == 't'));
return first + 1;
case json::flat::type::STRING:
CHECK_NEQ (first->first, first->last);
output.reset (new json::tree::string (first->first + 1, first->last - 1));
return first + 1;
case json::flat::type::INTEGER:
case json::flat::type::REAL:
output.reset (new json::tree::number (std::atof (first->first)));
return first + 1;
case json::flat::type::ARRAY_BEGIN: {
auto value = std::make_unique<json::tree::array> ();
auto cursor = ::parse (first + 1, last, *value);
output = std::move (value);
return cursor;
}
case json::flat::type::OBJECT_BEGIN: {
auto value = std::make_unique<json::tree::object> ();
auto cursor = ::parse (first + 1, last, *value);
output = std::move (value);
return cursor;
}
default:
unreachable ();
}
}
//-----------------------------------------------------------------------------
std::unique_ptr<json::tree::node> std::unique_ptr<json::tree::node>
json::tree::parse (const boost::filesystem::path &path) { json::tree::parse (const boost::filesystem::path &path)
auto data = slurp (path); {
return parse (static_cast <const char *> (data.get ())); util::mapped_file f (path);
return parse ((const char*)f.cbegin (), (const char*)f.cend ());
} }
@ -269,32 +325,6 @@ std::unique_ptr<json::tree::node>
json::tree::parse (const std::string &path) json::tree::parse (const std::string &path)
{ return parse (path.c_str (), path.c_str () + path.size ()); } { return parse (path.c_str (), path.c_str () + path.size ()); }
std::unique_ptr<json::tree::node>
json::tree::parse (const char *start,
const char *stop) {
bool __success = true;
json::tree::node *__root = nullptr;
size_t top = 0;
int cs;
deque <int> fsmstack;
deque <parse_context> nodestack;
const char *p = start,
*pe = stop,
*eof = stop;
%%write init;
%%write exec;
if (!__success) {
std::ostringstream os;
os << "unable to parse json at char " << (p - start);
throw parse_error (os.str ());
}
return std::unique_ptr<json::tree::node> (__root);
}
std::unique_ptr<json::tree::node> std::unique_ptr<json::tree::node>
json::tree::parse (const char *start) json::tree::parse (const char *start)
@ -306,6 +336,20 @@ json::tree::write (const json::tree::node &node, std::ostream &os)
{ node.write (os); } { node.write (os); }
std::unique_ptr<json::tree::node>
json::tree::parse (const char *first, const char *last)
{
std::unique_ptr<json::tree::node> output;
auto data = json::flat::parse (first, last);
auto end = ::parse (data.cbegin (), data.cend (), output);
CHECK (end == data.cend ());
(void)end;
return output;
}
//----------------------------------------------------------------------------- //-----------------------------------------------------------------------------
// Type conversion // Type conversion
@ -605,3 +649,27 @@ namespace json { namespace tree {
return std::unique_ptr<node> (new number (f)); return std::unique_ptr<node> (new number (f));
} }
} } } }
//-----------------------------------------------------------------------------
std::ostream&
operator<< (std::ostream &os, json::flat::type t)
{
switch (t) {
case json::flat::type::STRING: os << "STRING"; break;
case json::flat::type::NUL: os << "NUL"; break;
case json::flat::type::BOOLEAN: os << "BOOLEAN"; break;
case json::flat::type::INTEGER: os << "INTEGER"; break;
case json::flat::type::REAL: os << "REAL"; break;
case json::flat::type::OBJECT_BEGIN: os << "OBJECT_BEGIN"; break;
case json::flat::type::OBJECT_END: os << "OBJECT_END"; break;
case json::flat::type::ARRAY_BEGIN: os << "ARRAY_BEGIN"; break;
case json::flat::type::ARRAY_END: os << "ARRAY_END"; break;
default:
unreachable ();
}
return os;
}

View File

@ -14,7 +14,7 @@
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with libgim. If not, see <http://www.gnu.org/licenses/>. * along with libgim. If not, see <http://www.gnu.org/licenses/>.
* *
* Copyright 2010-2012 Danny Robson <danny@nerdcruft.net> * Copyright 2010-2015 Danny Robson <danny@nerdcruft.net>
*/ */
#ifndef __UTIL_JSON_HPP #ifndef __UTIL_JSON_HPP
@ -287,6 +287,62 @@ namespace json {
static T deserialise (const json::tree::node&); static T deserialise (const json::tree::node&);
}; };
} }
namespace flat {
enum class type {
UNKNOWN,
NUL,
BOOLEAN,
STRING,
INTEGER,
REAL,
OBJECT_BEGIN,
OBJECT_END,
ARRAY_BEGIN,
ARRAY_END
};
struct item {
type tag;
const char *first;
const char *last;
template <typename T>
T as (void) const;
};
std::vector<item> parse (const char *first, const char *last);
std::vector<item> parse (const char *first);
std::vector<item> parse (const boost::filesystem::path&);
//-----------------------------------------------------------------------------
struct error : public std::runtime_error {
error (const std::string &&_what):
runtime_error (std::move (_what))
{ ; }
};
struct parse_error : public error {
parse_error (size_t _line, const std::string &&_what):
error (std::move (_what)),
line (_line)
{ ; }
size_t line;
};
struct value_error : public error {
value_error (const std::string &&_what):
error (std::move (_what))
{ ; }
};
}
} }
template <typename T, class ...Args> template <typename T, class ...Args>

View File

@ -23,8 +23,8 @@ main (int argc, char ** argv) {
} }
try { try {
json::tree::parse (boost::filesystem::path (argv[ARG_PATH])); json::flat::parse (boost::filesystem::path (argv[ARG_PATH]));
} catch (json::tree::error &x) { } catch (std::exception &x) {
std::cerr << "Error: " << x.what () << std::endl; std::cerr << "Error: " << x.what () << std::endl;
return EXIT_FAILURE; return EXIT_FAILURE;
} }