From d64e3d244ee9b8be91e085e59714a70711bb2954 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Tue, 3 Oct 2017 17:48:27 +1100 Subject: [PATCH] utf8: clearer comments --- utf8.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/utf8.cpp b/utf8.cpp index 9d18c334..73468ad1 100644 --- a/utf8.cpp +++ b/utf8.cpp @@ -107,10 +107,9 @@ decode (util::view src, OutputT dst) codepoint_t accum { PREFIX[len].value (c) }; - // check every following data byte has the appropriate prefix - static constexpr auto CONTINUATION = "0b10xxxxxx"_test; - + // prepend each of the remaining bytes data to an accumulator for (int i = 1; i <= len; ++i) { + static constexpr auto CONTINUATION = "0b10xxxxxx"_test; if (cursor == src.cend ()) throw malformed_error {}; @@ -122,8 +121,11 @@ decode (util::view src, OutputT dst) accum |= CONTINUATION.value (now); } - // describes the bits required to be present for a valid minimally - // sized codepoint of a given byte length. + // check that the codepoint is the right size by seeing if the unique + // bits present in the decoded size codepoint are actually used. + // + // these could theoretically be provided to the user, but they may be + // misused so we will throw an error instead. static constexpr codepoint_t LEVEL_MASK[] { 0b00000000'00000000'01111111,