diff --git a/json.cpp.rl b/json.cpp.rl index 04006ac5..336e01f9 100644 --- a/json.cpp.rl +++ b/json.cpp.rl @@ -14,7 +14,7 @@ * You should have received a copy of the GNU General Public License * along with libgim. If not, see . * - * Copyright 2010-2012 Danny Robson + * Copyright 2010-2015 Danny Robson */ @@ -41,215 +41,172 @@ using namespace std; using namespace util; //----------------------------------------------------------------------------- -// Parsing - -struct parse_context { - parse_context(json::tree::node *_root): - root (_root), - value (NULL), - key (NULL), - start (NULL), - stop (NULL) - { ; } - - json::tree::node *root, - *value, - *key; - const char *start, - *stop; -}; - - %%{ + # JSON (rfc7159) machine json; - ## Record whether parsing was successful for future use - action success - { __success = true; } + action trace { if (false) std::cerr << *p; } + action success { __success = true; } + action failure { } - action failure { - __success = false; - /*std::cerr << std::endl - << "Failure on: '" << fc << "' in level " << top << " at " << fpc - p - << std::endl; - */ - } + action new_line { ++line; } + + action first { parsed.push_back ({ type::UNKNOWN, p, p}); } + action last { parsed.back ().last = p; } + + action tag_nul { parsed.back ().tag = type::NUL; } + action tag_boolean { parsed.back ().tag = type::BOOLEAN; } + action tag_string { parsed.back ().tag = type::STRING; } + action tag_integer { parsed.back ().tag = type::INTEGER; } + action tag_real { parsed.back ().tag = type::REAL; } + + action tag_object_begin { parsed.push_back ({ type::OBJECT_BEGIN, p, p + 1 }); } + action tag_object_end { parsed.push_back ({ type::OBJECT_END, p, p + 1 }); } + action tag_array_begin { parsed.push_back ({ type::ARRAY_BEGIN, p, p + 1 }); } + action tag_array_end { parsed.push_back ({ type::ARRAY_END, p, p + 1 }); } + + # Line counter + lines = ( + any | '\n' @new_line + )*; + + # UTF-8 (rfc3629) + utf8_tail = 0x80..0xbf; + + utf8_1 = 0x00..0x7f; + utf8_2 = 0xc2..0xdf utf8_tail; + utf8_3 = 0xe0 0xa0..0xbf utf8_tail | + 0xe1..0xec utf8_tail{2} | + 0xed 0x80..0x9f utf8_tail | + 0xee..0xef utf8_tail{2}; + utf8_4 = 0xf0 0x90..0xbf utf8_tail{2} | + 0xf1..0xf3 utf8_tail{3} | + 0xf4 0x80..0x8f utf8_tail{2}; - action new_object { nodestack.push_back (parse_context(new json::tree::object)); } - action new_array { nodestack.push_back (parse_context(new json::tree::array)); } + utf8 = utf8_1 | utf8_2 | utf8_3 | utf8_4; - action new_object_value { - CHECK (nodestack.back ().root->is_object ()); - CHECK (nodestack.back ().key); - CHECK (nodestack.back ().value); + # Utility + ws = 0x20 | 0x09 | 0x0A | 0x0D; + array_start = '['; + array_end = ']'; + object_start = '{'; + object_end = '}'; - if (!nodestack.back ().key->is_string ()) - throw parse_error ("object keys must be strings"); + # Strings + char = + (utf8 - ["\\]) + | "\\" ( + [\\"/bfnrt] + | "u" xdigit{4} + ) + ; - json::tree::object *object = (json::tree::object*)nodestack.back ().root; - object->insert (nodestack.back ().key->as_string (), - unique_ptr (nodestack.back ().value)); - nodestack.back ().key = NULL; - nodestack.back ().value = NULL; - } + string = ('"' char* '"') >first >tag_string %*last; - action new_array_value { - CHECK (nodestack.back ().root->is_array ()); - CHECK (nodestack.back ().value); + # numbers + int = '0' | [1-9] digit*; - json::tree::array *array = (json::tree::array *)nodestack.back ().root; - array->insert (unique_ptr (nodestack.back ().value)); - nodestack.back ().value = NULL; - } + frac = '.' digit+; + e = 'e'i[+\-]?; + exp = e digit+; - action new_string { - CHECK (!nodestack.empty ()); - CHECK (!nodestack.back ().value); + number = ( + '-'? + int + (frac >tag_real)? + exp? + ) >tag_integer; - std::string value (std::string (nodestack.back ().start, - nodestack.back ().stop)); - nodestack.back ().value = new json::tree::string(value); - } + # wrapper types + array = array_start @{ fhold; fcall array_members; } array_end; + object = object_start @{ fhold; fcall object_members; } object_end; - action new_boolean { - CHECK (!nodestack.empty ()); - CHECK (!nodestack.back ().value); + # simple types; case sensitive literals + bool = ("true" | "false") >tag_boolean; + nul = "null" >tag_nul; + literal = bool | nul; - throw parse_error ("unable to parse boolean"); - } + value = object | array | (number | string | literal) >first %last; - action new_number { - CHECK (!nodestack.empty ()); + # Complex + member = string ws* ':' ws* value; - parse_context &back = nodestack.back (); - CHECK (!back.value); - CHECK (back.start); - CHECK (back.stop); - CHECK_LE (back.start, back.stop); + array_members := (( + array_start >tag_array_begin ws* (value ws* (',' ws* value ws*)*)? array_end >tag_array_end + ) & lines) + @{ fhold; fret; } $trace $!failure; - errno = 0; - char *end; - double value = strtod (back.start, &end); - if (end == back.start || errno) - throw parse_error ("unable to parse number"); - back.value = new json::tree::number (value); - } + object_members := (( + object_start >tag_object_begin ws* (member ws* (',' ws* member ws*)*)? object_end >tag_object_end + ) & lines) + @{ fhold; fret; } $trace $!failure; - action new_null { - CHECK (!nodestack.empty ()); - CHECK (!nodestack.back ().value); + # meta types + document := ((ws* value ws*) & lines) + %success + $!failure + $trace; - nodestack.back().value = new json::tree::null (); - } - - action new_object_key { - CHECK (!nodestack.empty ()); - CHECK (nodestack.back ().root->is_object ()); - CHECK (nodestack.back ().value); - CHECK (!nodestack.back ().key); - - nodestack.back ().key = nodestack.back ().value; - nodestack.back ().value = NULL; - } - - prepush { - fsmstack.push_back (0); - } - - postpop { - fsmstack.pop_back (); - __root = nodestack.back ().root; - if (nodestack.size () > 1) - (nodestack.rbegin () + 1)->value = nodestack.back ().root; - nodestack.pop_back (); - } - - variable stack fsmstack; - - alphtype char; - - ## numerical - exp = [eE]('-' | '+')? digit+; - frac = '.' digit+; - int = '-'? [1-9]? digit+; - - number = int ( frac - | exp - | frac exp)?; - - ## textual - char = - any - (cntrl | '\"' | '\\') - | '\\\"' - | '\\\\' - | '\\/' - | '\\b' - | '\\f' - | '\\n' - | '\\r' - | '\\t' - | '\\u' xdigit{4}; - - string = ('"' - char* >{ nodestack.back ().start = fpc; } - %{ nodestack.back ().stop = fpc; }) - '"' - @new_string; - - ## other - boolean = - 'true' @{ nodestack.back ().value = new json::tree::boolean ( true); } - | 'false' @{ nodestack.back ().value = new json::tree::boolean (false); }; - - ## components - object = '{' @{ fhold; fcall _object; } '}'; - array = '[' @{ fhold; fcall _array; } ']'; - - value = - string - | boolean - | number >{ nodestack.back ().start = fpc; } %{ nodestack.back ().stop = fpc; } %new_number - | object - | array - | 'null' %new_null; - - ## compound data types - _array := ('[' @new_array - space* ((value %new_array_value space* ',' space*)* value %new_array_value space*)? - ']') - $!failure - @{ fhold; fret; }; - - pair = string %new_object_key space* ':' space* value %new_object_value; - _object := ('{' @new_object - space* ((pair space* ',' space*)* pair space*)? - '}') - $!failure - @{ fhold; fret; }; - - json := (space* (object | array) space*) - $!failure - %success - >{ __success = false; }; + variable stack ragelstack; + prepush { ragelstack.push_back (0); } + postpop { ragelstack.pop_back (); } write data; }%% +std::ostream& operator<< (std::ostream &os, json::flat::type); + + +//----------------------------------------------------------------------------- +std::vector +json::flat::parse (const char *first, const char *last) +{ + const char *p = first; + const char *pe = last; + const char *eof = pe; + + std::deque ragelstack; + std::vector parsed; + + size_t line = 0; + int cs, top; + bool __success = false; + + %%write init; + %%write exec; + + if (!__success) + throw parse_error (line, "parse error"); + + return parsed; +} + + +std::vector +json::flat::parse (const boost::filesystem::path &path) +{ + util::mapped_file f (path); + return parse ((const char *)f.cbegin (), (const char*)f.cend ()); +} + + //----------------------------------------------------------------------------- // External support template <> bool -is_integer (const json::tree::number &node) { +is_integer (const json::tree::number &node) +{ return is_integer (node.native ()); } template <> bool -is_integer (const json::tree::node &node) { +is_integer (const json::tree::node &node) +{ return node.is_number () && is_integer (node.as_number ()); } @@ -257,11 +214,110 @@ is_integer (const json::tree::node &node) { //----------------------------------------------------------------------------- // Node +static std::vector::const_iterator +parse (std::vector::const_iterator first, + std::vector::const_iterator last, + std::unique_ptr &output); + +//----------------------------------------------------------------------------- +static std::vector::const_iterator +parse (std::vector::const_iterator first, + std::vector::const_iterator last, + json::tree::array &parent) +{ + for (auto cursor = first; cursor != last; ) { + if (cursor->tag == json::flat::type::ARRAY_END) + return cursor + 1; + + std::unique_ptr value; + cursor = ::parse (cursor, last, value); + parent.insert (std::move (value)); + } + + unreachable (); +} + + +//----------------------------------------------------------------------------- +static std::vector::const_iterator +parse (std::vector::const_iterator first, + std::vector::const_iterator last, + json::tree::object &parent) +{ + for (auto cursor = first; cursor != last; ) { + if (cursor->tag == json::flat::type::OBJECT_END) + return cursor + 1; + + CHECK_EQ (cursor->tag, json::flat::type::STRING); + + std::string key (cursor->first + 1, cursor->last - 1); + ++cursor; + + std::unique_ptr val; + cursor = ::parse (cursor, last, val); + + parent.insert (key, std::move (val)); + } + + unreachable (); +} + + +//----------------------------------------------------------------------------- +static std::vector::const_iterator +parse (std::vector::const_iterator first, + std::vector::const_iterator last, + std::unique_ptr &output) +{ + CHECK (first != last); + CHECK (output.get () == nullptr); + + switch (first->tag) { + case json::flat::type::NUL: + output.reset (new json::tree::null ()); + return first + 1; + + case json::flat::type::BOOLEAN: + CHECK (*first->first == 't' || *first->first == 'f'); + output.reset (new json::tree::boolean (*first->first == 't')); + return first + 1; + + case json::flat::type::STRING: + CHECK_NEQ (first->first, first->last); + output.reset (new json::tree::string (first->first + 1, first->last - 1)); + return first + 1; + + case json::flat::type::INTEGER: + case json::flat::type::REAL: + output.reset (new json::tree::number (std::atof (first->first))); + return first + 1; + + case json::flat::type::ARRAY_BEGIN: { + auto value = std::make_unique (); + auto cursor = ::parse (first + 1, last, *value); + output = std::move (value); + return cursor; + } + + case json::flat::type::OBJECT_BEGIN: { + auto value = std::make_unique (); + auto cursor = ::parse (first + 1, last, *value); + output = std::move (value); + return cursor; + } + + default: + unreachable (); + } +} + +//----------------------------------------------------------------------------- std::unique_ptr -json::tree::parse (const boost::filesystem::path &path) { - auto data = slurp (path); - return parse (static_cast (data.get ())); +json::tree::parse (const boost::filesystem::path &path) +{ + util::mapped_file f (path); + return parse ((const char*)f.cbegin (), (const char*)f.cend ()); } @@ -269,32 +325,6 @@ std::unique_ptr json::tree::parse (const std::string &path) { return parse (path.c_str (), path.c_str () + path.size ()); } -std::unique_ptr -json::tree::parse (const char *start, - const char *stop) { - bool __success = true; - json::tree::node *__root = nullptr; - size_t top = 0; - int cs; - deque fsmstack; - deque nodestack; - - const char *p = start, - *pe = stop, - *eof = stop; - - %%write init; - %%write exec; - - if (!__success) { - std::ostringstream os; - os << "unable to parse json at char " << (p - start); - throw parse_error (os.str ()); - } - - return std::unique_ptr (__root); -} - std::unique_ptr json::tree::parse (const char *start) @@ -306,6 +336,20 @@ json::tree::write (const json::tree::node &node, std::ostream &os) { node.write (os); } +std::unique_ptr +json::tree::parse (const char *first, const char *last) +{ + std::unique_ptr output; + auto data = json::flat::parse (first, last); + auto end = ::parse (data.cbegin (), data.cend (), output); + + CHECK (end == data.cend ()); + (void)end; + + return output; +} + + //----------------------------------------------------------------------------- // Type conversion @@ -605,3 +649,27 @@ namespace json { namespace tree { return std::unique_ptr (new number (f)); } } } + + +//----------------------------------------------------------------------------- +std::ostream& +operator<< (std::ostream &os, json::flat::type t) +{ + switch (t) { + case json::flat::type::STRING: os << "STRING"; break; + case json::flat::type::NUL: os << "NUL"; break; + case json::flat::type::BOOLEAN: os << "BOOLEAN"; break; + case json::flat::type::INTEGER: os << "INTEGER"; break; + case json::flat::type::REAL: os << "REAL"; break; + + case json::flat::type::OBJECT_BEGIN: os << "OBJECT_BEGIN"; break; + case json::flat::type::OBJECT_END: os << "OBJECT_END"; break; + case json::flat::type::ARRAY_BEGIN: os << "ARRAY_BEGIN"; break; + case json::flat::type::ARRAY_END: os << "ARRAY_END"; break; + + default: + unreachable (); + } + + return os; +} diff --git a/json.hpp b/json.hpp index 760ed292..a15f995e 100644 --- a/json.hpp +++ b/json.hpp @@ -14,7 +14,7 @@ * You should have received a copy of the GNU General Public License * along with libgim. If not, see . * - * Copyright 2010-2012 Danny Robson + * Copyright 2010-2015 Danny Robson */ #ifndef __UTIL_JSON_HPP @@ -287,6 +287,62 @@ namespace json { static T deserialise (const json::tree::node&); }; } + + namespace flat { + enum class type { + UNKNOWN, + + NUL, + BOOLEAN, + STRING, + INTEGER, + REAL, + + OBJECT_BEGIN, + OBJECT_END, + + ARRAY_BEGIN, + ARRAY_END + }; + + struct item { + type tag; + const char *first; + const char *last; + + template + T as (void) const; + }; + + std::vector parse (const char *first, const char *last); + std::vector parse (const char *first); + std::vector parse (const boost::filesystem::path&); + + //----------------------------------------------------------------------------- + struct error : public std::runtime_error { + error (const std::string &&_what): + runtime_error (std::move (_what)) + { ; } + }; + + + struct parse_error : public error { + parse_error (size_t _line, const std::string &&_what): + error (std::move (_what)), + line (_line) + { ; } + + size_t line; + }; + + + struct value_error : public error { + value_error (const std::string &&_what): + error (std::move (_what)) + { ; } + }; + + } } template diff --git a/tools/json-clean.cpp b/tools/json-clean.cpp index 769d3b92..92934152 100644 --- a/tools/json-clean.cpp +++ b/tools/json-clean.cpp @@ -5,12 +5,12 @@ * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. - * + * * libgim is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. - * + * * You should have received a copy of the GNU General Public License * along with libgim. If not, see . * diff --git a/tools/json-schema.cpp b/tools/json-schema.cpp index 309804f0..18754fb6 100644 --- a/tools/json-schema.cpp +++ b/tools/json-schema.cpp @@ -5,12 +5,12 @@ * terms of the GNU General Public License as published by the Free Software * Foundation, either version 3 of the License, or (at your option) any later * version. - * + * * libgim is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more * details. - * + * * You should have received a copy of the GNU General Public License * along with libgim. If not, see . * @@ -210,11 +210,11 @@ is_number_valid (const json::tree::number &node, const json::tree::object &schema) { typedef bool (*number_validator_t)(const json::tree::number&, const json::tree::node&); static const map VALIDATORS = { - { "minimum", &is_minimum_valid }, - { "maximum", &is_maximum_valid }, - { "exclusiveMinimum", &is_exclusive_minimum_valid }, - { "exclusiveMaximum", &is_exclusive_maximum_valid }, - { "divisibleBy", &is_divisible_by_valid }, + { "minimum", &is_minimum_valid }, + { "maximum", &is_maximum_valid }, + { "exclusiveMinimum", &is_exclusive_minimum_valid }, + { "exclusiveMaximum", &is_exclusive_maximum_valid }, + { "divisibleBy", &is_divisible_by_valid }, }; for (const auto &i: schema) { @@ -340,7 +340,7 @@ is_unique_items_valid (const json::tree::array &node, if (!constraint.is_boolean ()) throw json::tree::schema_error ("uniqueItems must be a boolean"); - if (node.size () < 2) + if (node.size () < 2) return true; @@ -511,7 +511,7 @@ is_node_valid (const json::tree::node &node, return is_node_valid (node, referenced->as_object ()); } - if (schema.has ("type") && + if (schema.has ("type") && !is_type_valid (node, schema["type"])) { std::cerr << "node type is \"" << type_to_string (node) << "\", expected " << schema["type"] << "\n"; @@ -539,7 +539,7 @@ is_node_valid (const json::tree::node &node, #undef IS_VALID return false; - + /*static const map VALIDATORS ({ { "description", &is_always_valid }, @@ -549,7 +549,7 @@ is_node_valid (const json::tree::node &node, { "type", &is_type_valid }, });*/ - + //"required"; @@ -577,7 +577,7 @@ main (int argc, char **argv) { print_usage (argc, argv); return EXIT_FAILURE; } - + // Load the schema and input unique_ptr schema, input; try { @@ -593,7 +593,7 @@ main (int argc, char **argv) { std::cerr << "Schema should be an object\n"; return EXIT_FAILURE; } - + const json::tree::object &schema_object = schema->as_object (); // Check input is valid diff --git a/tools/json-validate.cpp b/tools/json-validate.cpp index b2c45167..c7f9db8e 100644 --- a/tools/json-validate.cpp +++ b/tools/json-validate.cpp @@ -23,8 +23,8 @@ main (int argc, char ** argv) { } try { - json::tree::parse (boost::filesystem::path (argv[ARG_PATH])); - } catch (json::tree::error &x) { + json::flat::parse (boost::filesystem::path (argv[ARG_PATH])); + } catch (std::exception &x) { std::cerr << "Error: " << x.what () << std::endl; return EXIT_FAILURE; }