libcruft-crypto/hash/blake.cpp

/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * Copyright 2018 Danny Robson <danny@nerdcruft.net>
 */

#include "blake.hpp"

#include <cruft/util/bitwise.hpp>
#include <cruft/util/cast.hpp>
#include <cruft/util/endian.hpp>
#include <cruft/util/view.hpp>

#include <array>

#include <cstdint>

using cruft::crypto::hash::blake;
using cruft::crypto::hash::detail::blake::traits;


///////////////////////////////////////////////////////////////////////////////
// the last six rows are repeats of the first two rows. this allows us to cut
// out a pretty frequent modulus operation.
static constexpr
int
    permute[16][16] = {
    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, },
    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3, },
    { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4, },
    {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8, },
    {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13, },
    {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9, },
    { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11, },
    { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10, },
    {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5, },
    { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13,  0, },

    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, },
    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3, },
    { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4, },
    {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8, },
    {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13, },
    {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9, },
};


///////////////////////////////////////////////////////////////////////////////
template <int width>
void
G (int i,
   int r,
   typename traits<width>::word_t &a,
   typename traits<width>::word_t &b,
   typename traits<width>::word_t &c,
   typename traits<width>::word_t &d,
   const typename traits<width>::word_t m[]
) {
    const auto j = permute[r][2 * i    ];
    const auto k = permute[r][2 * i + 1];

    a = a + b + (m[j] ^ traits<width>::pi[k]); d = cruft::rotater (d ^ a, traits<width>::rotations[0]);
    c = c + d;                                 b = cruft::rotater (b ^ c, traits<width>::rotations[1]);
    a = a + b + (m[k] ^ traits<width>::pi[j]); d = cruft::rotater (d ^ a, traits<width>::rotations[2]);
    c = c + d;                                 b = cruft::rotater (b ^ c, traits<width>::rotations[3]);
}


///////////////////////////////////////////////////////////////////////////////
template <int width>
std::array<typename traits<width>::word_t,8>
compress (
    std::array<typename traits<width>::word_t,8> h,
    const typename traits<width>::word_t m[16],
    const std::array<typename traits<width>::word_t,4> s,
    u64 t
) {
    typename traits<width>::word_t t0 =  t         & 0xffffffff;
    typename traits<width>::word_t t1 = (t >> 32u) & 0xffffffff;

    typename traits<width>::word_t v[16] = {
        h[0], h[1], h[2], h[3],
        h[4], h[5], h[6], h[7],

        s[0] ^ traits<width>::pi[0],
        s[1] ^ traits<width>::pi[1],
        s[2] ^ traits<width>::pi[2],
        s[3] ^ traits<width>::pi[3],

        t0 ^ traits<width>::pi[4],
        t0 ^ traits<width>::pi[5],
        t1 ^ traits<width>::pi[6],
        t1 ^ traits<width>::pi[7],
    };

    for (int r = 0; r < traits<width>::rounds; ++r) {
        G<width> (0, r, v[ 0], v[ 4], v[ 8], v[12], m);
        G<width> (1, r, v[ 1], v[ 5], v[ 9], v[13], m);
        G<width> (2, r, v[ 2], v[ 6], v[10], v[14], m);
        G<width> (3, r, v[ 3], v[ 7], v[11], v[15], m);

        G<width> (4, r, v[ 0], v[ 5], v[10], v[15], m);
        G<width> (5, r, v[ 1], v[ 6], v[11], v[12], m);
        G<width> (6, r, v[ 2], v[ 7], v[ 8], v[13], m);
        G<width> (7, r, v[ 3], v[ 4], v[ 9], v[14], m);
    }

    for (int i = 0; i < 8; ++i)
        h[i] = h[i] ^ s[i % 4] ^ v[i] ^ v[8 + i];

    return h;
}


///////////////////////////////////////////////////////////////////////////////
template <int width>
typename blake<width>::digest_t
blake<width>::operator() (
    cruft::view<const u08*> data,
    cruft::view<const u08*> salt
) const {
    std::array<typename detail::blake::traits<width>::word_t, 4> fwd {};

    if (salt.size () > sizeof (fwd))
        throw std::invalid_argument ("oversized salt");

    memcpy (&fwd, salt.begin (), salt.size ());
    return (*this) (data, fwd);
}

//-----------------------------------------------------------------------------
template <int width>
typename blake<width>::digest_t
blake<width>::operator() (
    cruft::view<const u08 *> data,
    const std::array<typename detail::blake::traits<width>::word_t, 4> salt
) const noexcept {
    auto h = detail::blake::traits<width>::iv;

    // bounce the message data through d08/dw so we can perform endian
    // conversion.
    //
    // however: this should probably be done in the compression function
    // instead, because it may be possible to optimise that implementation
    // more than simple calls to hton would allow.
    union {
        word_t dw[16];
        u08    d08[16*sizeof(word_t)];
    };

    u64 t = 0;
    auto cursor = data.cbegin ();

    // perform the simple case where we're consuming whole blocks
    for (auto last = data.cend ();
         cruft::cast::sign<size_t> (last - cursor) >= sizeof (dw);
         cursor += sizeof (dw))
    {
        // use the number of bits as the size
        t+= block_size * 8;

        memcpy (d08, cursor, sizeof (d08));
        std::transform (
            std::cbegin (dw),
            std::cend   (dw),
            std::begin  (dw),
            cruft::ntoh<word_t>
        );
        h = compress<width> (h, dw, salt, t);
    }

    // perform the messsage padding.
    //
    // * drain the buffer. this is guaranteed to fit into the bounce buffer.
    // * always append a 1 bit
    // * append enough 0 bits to give block_size-8 bytes
    // * set the last bit as 1
    // * append the two halves of the timer
    // * hash again
    //
    // if we need more space for padding then rehash
    // if at any point no message bits contributed then pass a zero counter
    {
        auto tail = std::copy (cursor, data.cend (), d08);
        t += (data.cend () - cursor) * 8;
        *tail = 0x80;
        bool empty = cursor == data.cend ();

        const auto last = std::end (d08) - 8 - 1;
        // we're _just_ within the space limits. set the high bit in place.
        if (tail == last) {
            *tail++ |= 0x01;
        // we're going to overflow
        } else if (tail > last) {
            std::fill (tail + 1, std::end (d08), 0);
            std::transform (
                std::cbegin (dw),
                std::cend   (dw),
                std::begin  (dw),
                cruft::ntoh<word_t>
            );
            h = compress<width> (h, dw, salt, t);

            empty = true;
            tail = last;
            std::fill (std::begin (d08), tail, 0);
            *tail++ = 0x01;
        // the simple case of appending zeros and a one
        } else {
            std::fill (tail+1, last, 0);
            tail = last;
            *tail++ = 0x01;
        }

        dw[14] = t>>32;
        dw[15] = t&0xffffffff;

        std::transform (
            std::cbegin (dw),
            std::cend   (dw) - 2,
            std::begin  (dw),
            cruft::ntoh<word_t>
        );
        h = compress<width> (h, dw, salt, empty ? 0 : t);
    }

    std::transform (std::cbegin (h), std::cend (h), std::begin (h), cruft::hton<word_t>);
    digest_t d;
    memcpy (d.data (), h.data (), sizeof (d));
    return d;
}


///////////////////////////////////////////////////////////////////////////////
template class cruft::crypto::hash::blake<256>;
template class cruft::crypto::hash::blake<512>;