From 2af828e82dd237b28cb8838b1665c42ae5dd3794 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Mon, 1 Jan 2018 15:47:41 +1100 Subject: [PATCH] time: add iso8601 parsing --- CMakeLists.txt | 6 +- test/time/8601.cpp | 52 ++++++++++++ time/parse.hpp | 38 +++++++++ time/parse8601.cpp.rl | 179 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 274 insertions(+), 1 deletion(-) create mode 100644 test/time/8601.cpp create mode 100644 time/parse.hpp create mode 100644 time/parse8601.cpp.rl diff --git a/CMakeLists.txt b/CMakeLists.txt index 20729385..d74a97d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,7 @@ endif() RAGEL_TARGET(json-flat json/flat.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/json/flat.cpp COMPILE_FLAGS -G2) RAGEL_TARGET(uri uri.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/uri.cpp COMPILE_FLAGS -G2) RAGEL_TARGET(version version.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/version.cpp) +RAGEL_TARGET(parse8601 time/parse8601.cpp.rl ${CMAKE_CURRENT_BINARY_DIR}/time/parse8601.cpp) ############################################################################### @@ -400,6 +401,8 @@ list ( term.hpp time.cpp time.hpp + time/parse.hpp + time/parse8601.cpp tuple.cpp tuple.hpp typeidx.cpp @@ -550,8 +553,9 @@ if (TESTS) string stringid strongdef - tuple + time/8601 traits + tuple typeidx uri utf8 diff --git a/test/time/8601.cpp b/test/time/8601.cpp new file mode 100644 index 00000000..465082bc --- /dev/null +++ b/test/time/8601.cpp @@ -0,0 +1,52 @@ +#include "tap.hpp" + +#include "time/parse.hpp" + + +int +main (int, char**) +{ + util::TAP::logger tap; + + static const char* BAD[] = { + "", + "foo" + }; + + for (const auto &t: BAD) { + tap.expect_throw ( + [t] () { util::time::iso8601::parse (t); }, + "invalid timestamp '%s'", t + ); + } + + using namespace std::chrono_literals; + + static const struct { + const char *string; + std::chrono::nanoseconds value; + const char *message; + } DECODE[] = { + { "1970-01-01T00:00:00Z", 0s, "UNIX epoch" }, + { "1970-01-01T01:01:00+01:01", 0s, "UNIX epoch, +offset" }, + { "1985-04-12T23:20:50.52Z", 482196050520ms, "fractional UTC time" }, + { "2000-02-28T01:00:00Z", 951699600s, "leap year, pre-29th" }, + { "2000-02-29T01:00:00Z", 951786000s, "leap year, on-29th" }, + { "2000-03-01T01:00:00Z", 951872400s, "leap year, post-29th" }, + { "1990-12-31T23:59:60Z", 662688000s, "1990 leap second in UTC" }, + { "1990-12-31T15:59:60-08:00", 662688000s, "1990 leap second in PST" }, + }; + + for (const auto &t: DECODE) { + auto val = util::time::iso8601::parse (t.string); + tap.expect_eq (val, t.value, "%s", t.message); + } + + tap.expect_eq ( + util::time::iso8601::parse ("1996-12-19T16:39:57-08:00"), + util::time::iso8601::parse ("1996-12-20T00:39:57Z"), + "timezone equivalence" + ); + + return tap.status (); +} \ No newline at end of file diff --git a/time/parse.hpp b/time/parse.hpp new file mode 100644 index 00000000..75cfa4d0 --- /dev/null +++ b/time/parse.hpp @@ -0,0 +1,38 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2017 Danny Robson + */ + +#ifndef CRUFT_UTIL_TIME_PARSE_HPP +#define CRUFT_UTIL_TIME_PARSE_HPP + +#include "../view.hpp" + +#include +#include + +namespace util::time::iso8601 { + /// parse ISO8601 formatted datetime strings + /// + /// returns nanoseconds since the UNIX epoch (excluding leap seconds). + /// + /// recognises fractional seconds up to the numeric limits of + /// std::chrono::nanoseconds. + /// + /// may throw on improperly formatted strings or unrepresentable values. + std::chrono::nanoseconds + parse (util::view); +} + +#endif diff --git a/time/parse8601.cpp.rl b/time/parse8601.cpp.rl new file mode 100644 index 00000000..3c15aecb --- /dev/null +++ b/time/parse8601.cpp.rl @@ -0,0 +1,179 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2017 Danny Robson + */ + +#include "time/parse.hpp" +#include "posix/except.hpp" + +#include +#include + + +/////////////////////////////////////////////////////////////////////////////// +%%{ + # based off rfc3339 rather than iso8601 because the former is public + + machine iso8601; + + date_fullyear = digit{4}; + date_month = digit{2}; + date_mday = digit{2}; + + time_hour = digit{2}; + time_minute = digit{2}; + time_second = digit{2}; + + time_secfrac = '.' digit{1,} ${ frac *= 10; frac += fc - '0'; }; + time_numoffset = ('+' %{dir=1;} | '-' %{dir=-1;}) + time_hour ${ offset.tm_hour *= 10; offset.tm_hour += fc - '0'; } + ':' time_minute ${ offset.tm_min *= 10; offset.tm_min += fc - '0'; }; + time_offset = 'Z' | time_numoffset; + + partial_time = time_hour ${ parts.tm_hour *= 10; parts.tm_hour += fc - '0'; } + ':' time_minute ${ parts.tm_min *= 10; parts.tm_min += fc - '0'; } + ':' time_second ${ parts.tm_sec *= 10; parts.tm_sec += fc - '0'; } + time_secfrac?; + + full_date = date_fullyear ${ parts.tm_year *= 10; parts.tm_year += fc - '0'; } + '-' date_month ${ parts.tm_mon *= 10; parts.tm_mon += fc - '0'; } + '-' date_mday ${ parts.tm_mday *= 10; parts.tm_mday += fc - '0'; }; + + full_time = partial_time time_offset; + + date_time := ( + full_date 'T' full_time + ) + >{ success = false; } + %{ success = true; }; + + write data; +}%% + + +/////////////////////////////////////////////////////////////////////////////// +template <> +bool +util::debug::validator::is_valid (const tm &val) noexcept +{ + // we don't test tm_year anywhere here because there isn't a valid range + // for years, only that they are expressed as offsets from 1900; + return val.tm_sec >= 0 && val.tm_sec <= 60 && + val.tm_min >= 0 && val.tm_min < 60 && + val.tm_hour >= 0 && val.tm_hour < 24 && + val.tm_mday > 0 && val.tm_mday <= 31 && + val.tm_mon >= 0 && val.tm_mon < 12 && + val.tm_wday >= 0 && val.tm_wday < 7 && + val.tm_yday >= 0 && val.tm_yday <= 365; +} + + +//----------------------------------------------------------------------------- +std::ostream& +operator<< (std::ostream &os, const tm&) +{ + return os << "{}"; +} + + +//----------------------------------------------------------------------------- +std::chrono::seconds +to_epoch (const tm &t) +{ + // TODO: it's assumed the user isn't passing in oddities like 36 months or + // similar. in the future we can account for this + CHECK_SANITY (t); + + + constexpr int + cumulative_days [12] = { + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 + }; + + constexpr int epoch_year = 1970; + const int year = 1900 + t.tm_year; + + // find the number of days since 1970. careful of leap years. + time_t secs; + secs = (year - epoch_year) * 365 + cumulative_days[t.tm_mon % 12]; + secs += (year - epoch_year + epoch_year % 4) / 4; + secs -= (year - epoch_year + epoch_year % 100) / 100; + secs += (year - epoch_year + epoch_year % 400) / 400; + + const bool is_leap_year = (year % 4 == 0 && (year % 100 != 0 || year % 400 == 0)); + if (is_leap_year && t.tm_mon < 2) + secs--; + + secs += t.tm_mday - 1; + + // hours + secs *= 24; + secs += t.tm_hour; + + // minutes + secs *= 60; + secs += t.tm_min; + + // seconds + secs *= 60; + secs += t.tm_sec; + + if (t.tm_isdst) + secs -= 60 * 60; + + return std::chrono::seconds {secs}; +} + + + +//----------------------------------------------------------------------------- +std::chrono::nanoseconds +util::time::iso8601::parse (util::view str) +{ + int cs; + const char *p = std::begin (str); + const char *pe = std::end (str); + const char *eof = pe; + + bool success = false; + + int dir = 0; + int64_t frac = 0; + struct tm parts, offset; + memset (&parts, 0, sizeof (parts)); + memset (&offset, 0, sizeof (offset)); + + %%write init; + %%write exec; + + if (!success) + throw std::invalid_argument ("invalid date string"); + + parts.tm_year -= 1900; + parts.tm_mon -= 1; + + // compute the timezone offset + std::chrono::seconds diff { + dir * (offset.tm_hour * 60 * 60 + offset.tm_min * 60) + }; + + // fractional part + auto nano_digits = util::digits10 (std::nano::den-1); + auto frac_digits = util::digits10 (frac); + auto shift = util::pow (10, unsigned(nano_digits - frac_digits)); + + // sum the time_t, timezone offset, and fractional components + return to_epoch (parts) - diff + std::chrono::nanoseconds (frac * shift); +} +