From 32f32401863bed020c0c7904b3128a4526a19165 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Thu, 5 Oct 2017 11:44:37 +1100 Subject: [PATCH] json2: initial 'jsonish' support --- json2/event.cpp | 4 + json2/personality/base.cpp | 17 ++- json2/personality/base.hpp | 7 ++ json2/personality/jsonish.cpp | 177 +++++++++++++++++++++++++++ json2/personality/jsonish.hpp | 25 +++- json2/personality/rfc7519.hpp | 16 +++ test/json2/event.cpp | 221 +++++++++++++++++++++++++++++++--- 7 files changed, 444 insertions(+), 23 deletions(-) diff --git a/json2/event.cpp b/json2/event.cpp index 35a7a388..b2170d3b 100644 --- a/json2/event.cpp +++ b/json2/event.cpp @@ -42,7 +42,11 @@ packet::type (void) const noexcept case 'f': return type_t::BOOLEAN; + // TODO: leading plus isn't valid json, but other similar formats support + // this syntax and it's easier to claim it as a number globally here until + // we do a little refactoring. case '-': + case '+': case '0'...'9': return type_t::NUMBER; } diff --git a/json2/personality/base.cpp b/json2/personality/base.cpp index d96b3f92..a8e00d81 100644 --- a/json2/personality/base.cpp +++ b/json2/personality/base.cpp @@ -254,7 +254,7 @@ base::parse_object (const std::function &cb, }; auto parse_member = [] (auto _cb, auto _cursor, auto _last) { - _cursor = parse_string (_cb, _cursor, _last); + _cursor = ParentT::parse_key (_cb, _cursor, _last); _cursor = ParentT::consume_whitespace (_cursor, _last); _cursor = expect (_cursor, _last, ':'); @@ -294,6 +294,7 @@ base::parse_value (const std::function &cb, const char *last) { switch (*first) { + case '+': case '-': case '0'...'9': return ParentT::parse_number (cb, first, last); @@ -309,10 +310,22 @@ base::parse_value (const std::function &cb, case '{': return ParentT::parse_object (cb, first, last); } - throw util::json2::parse_error (first); + return ParentT::parse_unknown (cb, first, last); } +/////////////////////////////////////////////////////////////////////////////// +template +const char* +base::parse_unknown (const std::function&, + const char *first, + const char *last) +{ + (void)last; + throw parse_error {first}; +}; + + //----------------------------------------------------------------------------- #define INSTANTIATE(KLASS) template struct util::json2::personality::base; diff --git a/json2/personality/base.hpp b/json2/personality/base.hpp index 116d9f0f..4e20e14a 100644 --- a/json2/personality/base.hpp +++ b/json2/personality/base.hpp @@ -77,6 +77,13 @@ namespace util::json2::personality { const char *first, const char *last ); + + static const char* + parse_unknown [[noreturn]] ( + const std::function&, + const char *first, + const char *last + ); }; }; diff --git a/json2/personality/jsonish.cpp b/json2/personality/jsonish.cpp index 653645f9..87fd7db9 100644 --- a/json2/personality/jsonish.cpp +++ b/json2/personality/jsonish.cpp @@ -17,8 +17,185 @@ #include "./jsonish.hpp" #include "./base.hpp" +#include "../event.hpp" +#include "../except.hpp" +#include "../../debug.hpp" using util::json2::personality::jsonish; /////////////////////////////////////////////////////////////////////////////// +const char* +jsonish::consume_whitespace (const char *first, const char *last) noexcept +{ + auto cursor = base::consume_whitespace (first, last); + + // consume a comment + if (cursor != last && *cursor == '#') { + while (cursor != last && *cursor != '\n') + ++cursor; + + return consume_whitespace (cursor, last); + } + + return cursor; +} + + +/////////////////////////////////////////////////////////////////////////////// +// format is: +// int: '0x' hex+ | '0' oct+ | '0b' bit+ +// +// float: significand exp? +// significand: digit+ ('.' digit*)? +// exp: [eE] sign? digit+ +// +// number: [+-] (int | float) +const char* +jsonish::parse_number (const std::function &cb, + const char *first, + const char *last) +{ + auto cursor = first; + + if (cursor != last && (*cursor == '+' || *cursor == '-')) + ++cursor; + + if (cursor != last && *cursor == '0') { + ++cursor; + + if (cursor == last) + throw parse_error {cursor}; + + char max = '9'; + switch (*cursor) { + case 'x': { + // parse the hex integer here because we can simplify the + // remaining cases somewhat if we don't need to care about the + // multiple ranges of valid digits. + ++cursor; + + auto digit_start = cursor; + while (cursor != last && ('0' <= *cursor && *cursor <= '9' || + 'a' <= *cursor && *cursor <= 'f' || + 'A' <= *cursor && *cursor <= 'F')) + ++cursor; + if (digit_start == cursor) + throw parse_error {cursor}; + + cb ({first, cursor}); + return cursor; + }; + + case 'b': max = '1'; break; + case '0'...'7': max = '7'; break; + + case '.': + goto frac; + } + + auto digit_start = ++cursor; + while (cursor != last && '0' <= *cursor && *cursor <= max) + ++cursor; + if (digit_start == cursor) + throw parse_error {cursor}; + + cb ({first, cursor}); + return cursor; + } + + while (cursor != last && '0' <= *cursor && *cursor <= '9') + ++cursor; + if (cursor == last) + goto done; + + if (*cursor != '.') + goto exp; + +frac: + ++cursor; + while (cursor != last && *cursor >= '0' && *cursor <= '9') + ++cursor; + if (cursor == last) + goto done; + +exp: + if (cursor != last && (*cursor == 'e' || *cursor == 'E')) { + ++cursor; + + if (cursor != last && (*cursor == '+' || *cursor == '-')) + ++cursor; + + auto digit_start = cursor; + while (cursor != last && '0' <= *cursor && *cursor <= '9') + ++cursor; + if (digit_start == cursor) + throw parse_error {cursor}; + } + + if (first == cursor) + throw parse_error {cursor}; + +done: + cb ({first, cursor}); + return cursor; +} + + +/////////////////////////////////////////////////////////////////////////////// +const char* +jsonish::parse_key (const std::function &cb, + const char *first, + const char *last) +{ + auto cursor = first; + if (cursor == last) + throw parse_error {cursor}; + + // must start with alpha or underscore + switch (*cursor) { + case 'a'...'z': + case 'A'...'Z': + case '_': + ++cursor; + break; + + default: + throw parse_error {cursor}; + } + + + while (cursor != last) { + switch (*cursor) { + case 'a'...'z': + case 'A'...'Z': + case '_': + case '0'...'9': + ++cursor; + break; + + default: + cb ({first, cursor}); + return cursor; + } + } + + cb ({first, cursor}); + return cursor; +} + + +/////////////////////////////////////////////////////////////////////////////// +const char* +jsonish::parse_string (const std::function &cb, + const char *first, + const char *last) +{ + if (first == last) + throw parse_error {first}; + + if (*first == '"') + return base::parse_string (cb, first, last); + else + return parse_key (cb, first, last); +} diff --git a/json2/personality/jsonish.hpp b/json2/personality/jsonish.hpp index 74b16351..fd44f61c 100644 --- a/json2/personality/jsonish.hpp +++ b/json2/personality/jsonish.hpp @@ -27,8 +27,10 @@ namespace util::json2::personality { struct jsonish { static const char* - consume_whitespace [[nodiscard]] (const char *first, const char *last) noexcept - { return base::consume_whitespace (first, last); } + consume_whitespace [[nodiscard]] ( + const char *first, + const char *last + ) noexcept; static const char* @@ -43,7 +45,7 @@ namespace util::json2::personality { const std::function &cb, const char *first, const char *last - ) { return base::parse_number (cb, first, last); } + ); template @@ -61,7 +63,7 @@ namespace util::json2::personality { const std::function &cb, const char *first, const char *last - ) { return base::parse_string (cb, first, last); } + ); static const char* @@ -72,12 +74,27 @@ namespace util::json2::personality { ) { return base::parse_array (cb, first, last); } + static const char* + parse_key [[nodiscard]] ( + const std::function &cb, + const char *first, + const char *last); + + static const char* parse_object [[nodiscard]] ( const std::function &cb, const char *first, const char *last ) { return base::parse_object (cb, first, last); } + + + static const char* + parse_unknown [[nodiscard]] ( + const std::function &cb, + const char *first, + const char *last + ) { return parse_string (cb, first, last); } }; }; diff --git a/json2/personality/rfc7519.hpp b/json2/personality/rfc7519.hpp index 20a71fe9..cf26e0ad 100644 --- a/json2/personality/rfc7519.hpp +++ b/json2/personality/rfc7519.hpp @@ -71,12 +71,28 @@ namespace util::json2::personality { ) { return base::parse_array (cb, first, last); } + static const char* + parse_key [[nodiscard]] ( + const std::function &cb, + const char *first, + const char *last) + { return parse_string (cb, first, last); } + + static const char* parse_object [[nodiscard]] ( const std::function &cb, const char *first, const char *last ) { return base::parse_object (cb, first, last); } + + + static const char * + parse_unknown [[noreturn]] ( + const std::function &cb, + const char *first, + const char *last) + { throw base::parse_unknown (cb, first, last); } }; }; diff --git a/test/json2/event.cpp b/test/json2/event.cpp index c7bc7021..f0159519 100644 --- a/test/json2/event.cpp +++ b/test/json2/event.cpp @@ -7,6 +7,7 @@ #include +/////////////////////////////////////////////////////////////////////////////// void test_numbers (util::TAP::logger &tap) { @@ -18,6 +19,7 @@ test_numbers (util::TAP::logger &tap) { "1", true, "single digit" }, { "01", false, "leading zero" }, { "-1", true, "leading minus" }, + { "+1", false, "leading plus" }, { "1.", false, "truncated fraction" }, { "1.0", true, "fraction" }, { "1.0e", false, "truncated exponential" }, @@ -57,6 +59,7 @@ test_numbers (util::TAP::logger &tap) } +/////////////////////////////////////////////////////////////////////////////// void test_literals (util::TAP::logger &tap) { @@ -103,6 +106,7 @@ test_literals (util::TAP::logger &tap) } +/////////////////////////////////////////////////////////////////////////////// void test_strings (util::TAP::logger &tap) { @@ -155,16 +159,100 @@ test_arrays (util::TAP::logger &tap) std::vector types; const char *message; } TESTS[] = { - { "[]", true, { type_t::ARRAY_BEGIN, type_t::ARRAY_END }, "empty" }, - { "[1]", true, { type_t::ARRAY_BEGIN, type_t::NUMBER, type_t::ARRAY_END }, "single number" }, - { "[1true]", false, { type_t::ARRAY_BEGIN, type_t::NUMBER }, "contatenated number/bool" }, - { "[1,2]", true, { type_t::ARRAY_BEGIN, type_t::NUMBER, type_t::NUMBER, type_t::ARRAY_END }, "two numbers" }, - { "[1,]", false, { type_t::ARRAY_BEGIN, type_t::NUMBER }, "single trailing comma" }, - { "[1,2,]", false, { type_t::ARRAY_BEGIN, type_t::NUMBER, type_t::NUMBER }, "double trailing comma" }, - { "[,]", false, { type_t::ARRAY_BEGIN }, "only comma" }, - { "[", false, { type_t::ARRAY_BEGIN }, "missing terminator" }, - { "[[]]", true, { type_t::ARRAY_BEGIN, type_t::ARRAY_BEGIN, type_t::ARRAY_END, type_t::ARRAY_END }, "nested array" }, - { "[[]", false, { type_t::ARRAY_BEGIN, type_t::ARRAY_END }, "unbalanced nested array" }, + { + "[]", + true, + { + type_t::ARRAY_BEGIN, + type_t::ARRAY_END + }, + "empty" + }, + { + "[1]", + true, + { + type_t::ARRAY_BEGIN, + type_t::NUMBER, + type_t::ARRAY_END + }, + "single number" + }, + { + "[1true]", + false, + { + type_t::ARRAY_BEGIN, + type_t::NUMBER + }, + "contatenated number/bool" + }, + { + "[1,2]", + true, + { + type_t::ARRAY_BEGIN, + type_t::NUMBER, + type_t::NUMBER, + type_t::ARRAY_END + }, + "two numbers" + }, + { + "[1,]", + false, + { + type_t::ARRAY_BEGIN, + type_t::NUMBER + }, + "single trailing comma" + }, + { + "[1,2,]", + false, + { + type_t::ARRAY_BEGIN, + type_t::NUMBER, + type_t::NUMBER + }, + "double trailing comma" + }, + { + "[,]", + false, + { + type_t::ARRAY_BEGIN + }, + "only comma" + }, + { + "[", + false, + { + type_t::ARRAY_BEGIN + }, + "missing terminator" + }, + { + "[[]]", + true, + { + type_t::ARRAY_BEGIN, + type_t::ARRAY_BEGIN, + type_t::ARRAY_END, + type_t::ARRAY_END + }, + "nested array" + }, + { + "[[]", + false, + { + type_t::ARRAY_BEGIN, + type_t::ARRAY_END + }, + "unbalanced nested array" + }, }; for (const auto &t: TESTS) { @@ -199,12 +287,65 @@ test_objects (util::TAP::logger &tap) std::vector strings; const char *message; } TESTS[] = { - { "{}", true, { type_t::OBJECT_BEGIN, type_t::OBJECT_END }, {}, "empty" }, - { "{", false, { type_t::OBJECT_BEGIN }, {}, "missing terminator" }, - { "{\"a\":1}", true, { type_t::OBJECT_BEGIN, type_t::STRING, type_t::NUMBER, type_t::OBJECT_END }, {"\"a\""}, "empty" }, - { "{1:1}", false, { type_t::OBJECT_BEGIN }, {}, "integer key" }, - { "{:1}", false, { type_t::OBJECT_BEGIN }, {}, "no key" }, - { "{\"a\":}", false, { type_t::OBJECT_BEGIN, type_t::STRING }, {}, "no value" }, + { + "{}", + true, + { + type_t::OBJECT_BEGIN, + type_t::OBJECT_END + }, + {}, + "empty" + }, + { + "{", + false, + { type_t::OBJECT_BEGIN }, + {}, + "missing terminator" + }, + { + R"json({"a":1})json", + true, + { + type_t::OBJECT_BEGIN, + type_t::STRING, + type_t::NUMBER, + type_t::OBJECT_END + }, + { + "\"a\"" + }, + "empty" + }, + { + "{1:1}", + false, + { + type_t::OBJECT_BEGIN + }, + {}, + "integer key" + }, + { + "{:1}", + false, + { + type_t::OBJECT_BEGIN + }, + {}, + "no key" + }, + { + R"json({"a":})json", + false, + { + type_t::OBJECT_BEGIN, + type_t::STRING + }, + {}, + "no value" + }, { R"json({"a":[]})json", true, @@ -229,7 +370,10 @@ test_objects (util::TAP::logger &tap) type_t::OBJECT_END, type_t::OBJECT_END }, - { "\"a\"", "\"b\"" }, + { + "\"a\"", + "\"b\"" + }, "recursive object" } }; @@ -260,6 +404,47 @@ test_objects (util::TAP::logger &tap) }; +/////////////////////////////////////////////////////////////////////////////// +void +test_jsonish (util::TAP::logger &tap) +{ + static const struct { + const char *data; + const char *message; + } TESTS[] = { + //{ "{}", "empty object" }, + //{ "0xdeadbeef", "hex literal" }, + //{ "0b11011100", "binary literal" }, + //{ "0666", "octal literal" }, + //{ "0.", "float without frac" }, + //{ "+0.", "float with leading +" }, + //{ "-0.", "float with leading -" }, + //{ "string", "bare literal string" }, + //{ "{foo: 1}", "bare string key" }, + //{ "{foo: bar}", "bare string key and value" }, + //{ "{foo: bar,}", "trailing object comma" }, + //{ "[1,]", "trailing array comma" }, + //{ "1 #comment", "trailing comment" }, + { "#comment\n1", "leading comment" }, + }; + + for (const auto &t: TESTS) { + bool success = true; + try { + util::json2::event::parse ( + [] (auto) {}, + t.data, + t.data + strlen (t.data) + ); + } catch (const util::json2::error&) { + success = false; + } + + tap.expect (success, "jsonish, %s", t.message); + } +}; + + /////////////////////////////////////////////////////////////////////////////// int main (void) @@ -272,5 +457,7 @@ main (void) test_arrays (tap); test_objects (tap); + test_jsonish (tap); + return tap.status (); } \ No newline at end of file