libcruft-util/hash/keccak.cpp

#include "./keccak.hpp"

#include "../endian.hpp"
#include "../maths.hpp"
#include "../bitwise.hpp"

#include <cstdlib>
#include <cstdint>
#include <array>
#include <iostream>
#include <algorithm>

#if 0
#define FOR(i,n) for(i=0; i<n; ++i)

int LFSR86540(u8 *R) { (*R)=((*R)<<1)^(((*R)&0x80)?0x71:0); return ((*R)&2)>>1; }
#define ROL(a,o) ((((u64)a)<<o)^(((u64)a)>>(64-o)))
static u64 load64(const u8 *x) { ui i; u64 u=0; FOR(i,8) { u<<=8; u|=x[7-i]; } return u; }
static void store64(u8 *x, u64 u) { ui i; FOR(i,8) { x[i]=u; u>>=8; } }
static void xor64(u8 *x, u64 u) { ui i; FOR(i,8) { x[i]^=u; u>>=8; } }
#define rL(x,y) load64((u8*)s+8*(x+5*y))
#define wL(x,y,l) store64((u8*)s+8*(x+5*y),l)
#define XL(x,y,l) xor64((u8*)s+8*(x+5*y),l)
void KeccakF1600(void *s)
{
    ui r,x,y,i,j,Y; u8 R=0x01; u64 C[5],D;
    for(i=0; i<24; i++) {
        /*θ*/ FOR(x,5) C[x]=rL(x,0)^rL(x,1)^rL(x,2)^rL(x,3)^rL(x,4); FOR(x,5) { D=C[(x+4)%5]^ROL(C[(x+1)%5],1); FOR(y,5) XL(x,y,D); }
        /*ρπ*/ x=1; y=r=0; D=rL(x,y); FOR(j,24) { r+=j+1; Y=(2*x+3*y)%5; x=y; y=Y; C[0]=rL(x,y); wL(x,y,ROL(D,r%64)); D=C[0]; }
        /*χ*/ FOR(y,5) { FOR(x,5) C[x]=rL(x,y); FOR(x,5) wL(x,y,C[x]^((~C[(x+1)%5])&C[(x+2)%5])); }
        /*ι*/ FOR(j,7) if (LFSR86540(&R)) XL(0,0,(u64)1<<((1<<j)-1));
    }
}
void Keccak(ui r, ui c, const u8 *in, u64 inLen, u8 sfx, u8 *out, u64 outLen)
{
    /*initialize*/ u8 s[200]; ui R=r/8; ui i,b=0; FOR(i,200) s[i]=0;
    /*absorb*/ while(inLen>0) { b=(inLen<R)?inLen:R; FOR(i,b) s[i]^=in[i]; in+=b; inLen-=b; if (b==R) { KeccakF1600(s); b=0; } }
    /*pad*/ s[b]^=sfx; if((sfx&0x80)&&(b==(R-1))) KeccakF1600(s); s[R-1]^=0x80; KeccakF1600(s);
    /*squeeze*/ while(outLen>0) { b=(outLen<R)?outLen:R; FOR(i,b) out[i]=s[i]; out+=b; outLen-=b; if(outLen>0) KeccakF1600(s); }
}

#else

// derived from Keccak (KCP) readable-and-compact C implementation

/*
Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby
denoted as "the implementer".

For more information, feedback or questions, please refer to our websites:
http://keccak.noekeon.org/
http://keyak.noekeon.org/
http://ketje.noekeon.org/

To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/

/*
================================================================
The purpose of this source file is to demonstrate a readable and compact
implementation of all the Keccak instances approved in the FIPS 202 standard,
including the hash functions and the extendable-output functions (XOFs).

We focused on clarity and on source-code compactness,
rather than on the performance.

The advantages of this implementation are:
    + The source code is compact, after removing the comments, that is. :-)
    + There are no tables with arbitrary constants.
    + For clarity, the comments link the operations to the specifications using
        the same notation as much as possible.
    + There is no restriction in cryptographic features. In particular,
        the SHAKE128 and SHAKE256 XOFs can produce any output length.
    + The code does not use much RAM, as all operations are done in place.

The drawbacks of this implementation are:
    - There is no message queue. The whole message must be ready in a buffer.
    - It is not optimized for peformance.

The implementation is even simpler on a little endian platform. Just define the
LITTLE_ENDIAN symbol in that case.

For a more complete set of implementations, please refer to
the Keccak Code Package at https://github.com/gvanas/KeccakCodePackage

For more information, please refer to:
    * [Keccak Reference] http://keccak.noekeon.org/Keccak-reference-3.0.pdf
    * [Keccak Specifications Summary] http://keccak.noekeon.org/specs_summary.html

This file uses UTF-8 encoding, as some comments use Greek letters.
================================================================
*/

/**
  * Function to compute the Keccak[r, c] sponge function over a given input.
  * @param  rate            The value of the rate r.
  * @param  capacity        The value of the capacity c.
  * @param  input           Pointer to the input message.
  * @param  inputByteLen    The number of input bytes provided in the input message.
  * @param  delimitedSuffix Bits that will be automatically appended to the end
  *                         of the input message, as in domain separation.
  *                         This is a byte containing from 0 to 7 bits
  *                         These <i>n</i> bits must be in the least significant bit positions
  *                         and must be delimited with a bit 1 at position <i>n</i>
  *                         (counting from 0=LSB to 7=MSB) and followed by bits 0
  *                         from position <i>n</i>+1 to position 7.
  *                         Some examples:
  *                             - If no bits are to be appended, then @a delimitedSuffix must be 0x01.
  *                             - If the 2-bit sequence 0,1 is to be appended (as for SHA3-*), @a delimitedSuffix must be 0x06.
  *                             - If the 4-bit sequence 1,1,1,1 is to be appended (as for SHAKE*), @a delimitedSuffix must be 0x1F.
  *                             - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedSuffix must be 0x8B.
  * @param  output          Pointer to the buffer where to store the output.
  * @param  outputByteLen   The number of output bytes desired.
  * @pre    One must have r+c=1600 and the rate a multiple of 8 bits in this implementation.
  */

/**
  *  Function to compute SHAKE128 on the input message with any output length.
  */
void
FIPS202_SHAKE128(const uint8_t *input, size_t inputByteLen, uint8_t *output, size_t outputByteLen)
{
    keccak k (1344, 256, 0x1f);
    k.update (input, inputByteLen);
    k.digest (output, outputByteLen);
}

/**
  *  Function to compute SHAKE256 on the input message with any output length.
  */
void FIPS202_SHAKE256(const uint8_t *input, size_t inputByteLen, uint8_t *output, size_t outputByteLen)
{
    keccak k (1088, 512, 0x1f);
    k.update (input, inputByteLen);
    k.digest (output, outputByteLen);
}

/**
  *  Function to compute SHA3-224 on the input message. The output length is fixed to 28 bytes.
  */
void FIPS202_SHA3_224(const uint8_t *input, size_t inputByteLen, uint8_t *output)
{
    keccak k (1152, 448, 0x06);

    for (unsigned int i = 0; i < inputByteLen; ++i)
        k.update (&input[i], 1);
    //k.update (input, inputByteLen);
    k.digest (output, 28);
}

/**
  *  Function to compute SHA3-256 on the input message. The output length is fixed to 32 bytes.
  */
void FIPS202_SHA3_256(const uint8_t *input, size_t inputByteLen, uint8_t *output)
{
    keccak k (1088, 512, 0x06);
    k.update (input, inputByteLen);
    k.digest (output, 32);
}

/**
  *  Function to compute SHA3-384 on the input message. The output length is fixed to 48 bytes.
  */
void FIPS202_SHA3_384(const uint8_t *input, size_t inputByteLen, uint8_t *output)
{
    keccak k (832, 768, 0x06);
    k.update (input, inputByteLen);
    k.digest (output, 48);
}

/**
  *  Function to compute SHA3-512 on the input message. The output length is fixed to 64 bytes.
  */
void FIPS202_SHA3_512(const uint8_t *input, size_t inputByteLen, uint8_t *output)
{
    keccak k (576, 1024, 0x06);
    k.update (input, inputByteLen);
    k.digest (output, 64);
}


/*
================================================================
A readable and compact implementation of the Keccak-f[1600] permutation.
================================================================
*/

//static constexpr
//size_t
//i (size_t x, size_t y)
//{
//    return x + 5 * y;
//}


/**
  * Function that computes the linear feedback shift register (LFSR) used to
  * define the round constants (see [Keccak Reference, Section 1.2]).
  */
class lfsr86540 {
public:
    lfsr86540  ():
        value (0x1)
    { ; }


    bool
    update (void)
    {
        bool result = value & 0x01;
        if (value & 0x80)
            // Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1
            value = (value << 1) ^ 0x71;
        else
            value <<= 1;
        return result;
    }


private:
    uint8_t value;
};


// θ step, see [Keccak Reference, Section 2.3.2]
static void
permute_theta (uint64_t m_words[5][5])
{
    uint64_t C[5], D;

    // Compute the parity of the columns
    for (unsigned x = 0; x < 5; ++x)
        C[x] = m_words[0][x] ^ m_words[1][x] ^ m_words[2][x] ^ m_words[3][x] ^ m_words[4][x];

    for (unsigned x = 0; x < 5; ++x) {
        // Compute the θ effect for a given column
        D = C[(x+4)%5] ^ util::rotatel (C[(x+1)%5], 1);

        // Add the θ effect to the whole column
        for (unsigned y = 0; y < 5; ++y)
            m_words[y][x] ^= D;
    }
}


void
permute_rho (uint64_t m_words[5][5])
{
    m_words[0][1] = util::rotatel (m_words[0][1],  1);
    m_words[0][2] = util::rotatel (m_words[0][2], 62);
    m_words[0][3] = util::rotatel (m_words[0][3], 28);
    m_words[0][4] = util::rotatel (m_words[0][4], 27);
    m_words[1][0] = util::rotatel (m_words[1][0], 36);
    m_words[1][1] = util::rotatel (m_words[1][1], 44);
    m_words[1][2] = util::rotatel (m_words[1][2],  6);
    m_words[1][3] = util::rotatel (m_words[1][3], 55);
    m_words[1][4] = util::rotatel (m_words[1][4], 20);
    m_words[2][0] = util::rotatel (m_words[2][0],  3);
    m_words[2][1] = util::rotatel (m_words[2][1], 10);
    m_words[2][2] = util::rotatel (m_words[2][2], 43);
    m_words[2][3] = util::rotatel (m_words[2][3], 25);
    m_words[2][4] = util::rotatel (m_words[2][4], 39);
    m_words[3][0] = util::rotatel (m_words[3][0], 41);
    m_words[3][1] = util::rotatel (m_words[3][1], 45);
    m_words[3][2] = util::rotatel (m_words[3][2], 15);
    m_words[3][3] = util::rotatel (m_words[3][3], 21);
    m_words[3][4] = util::rotatel (m_words[3][4],  8);
    m_words[4][0] = util::rotatel (m_words[4][0], 18);
    m_words[4][1] = util::rotatel (m_words[4][1],  2);
    m_words[4][2] = util::rotatel (m_words[4][2], 61);
    m_words[4][3] = util::rotatel (m_words[4][3], 56);
    m_words[4][4] = util::rotatel (m_words[4][4], 14);
    return;


    for (size_t i = 1; i < 25; ++i) {
        //unsigned r = ((t+1)*(t+2)/2)%64;
        unsigned r = ((i + 1) * (i + 2) / 2) % 64;

        m_words[i/5][i%5] = util::rotatel (m_words[i/5][i%5], r);
    }
}


void
permute_pi (uint64_t m_words[5][5])
{
    //auto A = reinterpret_cast<uint64_t*> (m_words);

    //uint64_t A1;
    //A1 = A[1];
    //A[ 1] = A[ 6];
    //A[ 6] = A[ 9];
    //A[ 9] = A[22];
    //A[22] = A[14];
    //A[14] = A[20];
    //A[20] = A[ 2];
    //A[ 2] = A[12];
    //A[12] = A[13];
    //A[13] = A[19];
    //A[19] = A[23];
    //A[23] = A[15];
    //A[15] = A[ 4];
    //A[ 4] = A[24];
    //A[24] = A[21];
    //A[21] = A[ 8];
    //A[ 8] = A[16];
    //A[16] = A[ 5];
    //A[ 5] = A[ 3];
    //A[ 3] = A[18];
    //A[18] = A[17];
    //A[17] = A[11];
    //A[11] = A[ 7];
    //A[ 7] = A[10];
    //A[10] = A1;
    //return;

    unsigned x = 1, y = 0;
    uint64_t current = m_words[y][x];
    uint64_t temp;
    // Iterate over ((0 1)(2 3))^t * (1 0) for 0 ≤ t ≤ 23
    for (unsigned t = 0; t < 24; ++t) {
        unsigned int Y = (2*x+3*y)%5;
        x = y;
        y = Y;

        temp = m_words[y][x];
        m_words[y][x] = current;
        current = temp;
    }

    //for (unsigned int i = 0; i < 5; ++i)
    //    for (unsigned int j = 0; j < 5; ++j)
    //        m_words[j][(2*i+3*j)%5] = m_words[i][j];
}


/**
 * Function that computes the Keccak-f[1600] permutation on the given state.
 */
void
keccak::permute (void)
{
    for (size_t i = 0; i < m_bitrate/64; ++i)
        m_words[i/5][i%5] = util::ltoh (m_words[i/5][i%5]);

    lfsr86540 shift;

    for (unsigned round = 0; round < 24; ++round) {
        permute_theta (m_words);
        permute_rho (m_words);
        permute_pi (m_words);


        if (0) {   // === ρ and π steps (see [Keccak Reference, Sections 2.3.3 and 2.3.4]) ===
            uint64_t current, temp;
            // Start at coordinates (1 0)
            unsigned x = 1, y = 0;
            current = m_words[y][x];
            // Iterate over ((0 1)(2 3))^t * (1 0) for 0 ≤ t ≤ 23
            for (unsigned t = 0; t < 24; ++t) {
                // Compute the rotation constant r = (t+1)(t+2)/2
                unsigned int r = ((t+1)*(t+2)/2)%64;

                // Compute ((0 1)(2 3)) * (x y)
                unsigned int Y = (2*x+3*y)%5; x = y; y = Y;

                // Swap current and state(x,y), and rotate
                temp = m_words[y][x];
                m_words[y][x] = util::rotatel (current, r);
                current = temp;
            }
        }

        {   // === χ step (see [Keccak Reference, Section 2.3.1]) ===
            uint64_t temp[5];
            for (unsigned y = 0; y < 5; ++y) {
                // Take a copy of the plane
                for (unsigned x = 0; x < 5; ++x)
                    temp[x] = m_words[y][x];

                // Compute χ on the plane
                for(unsigned x = 0; x < 5; ++x)
                    m_words[y][x] = temp[x] ^((~temp[(x+1)%5]) & temp[(x+2)%5]);
            }
        }

        {   // === ι step (see [Keccak Reference, Section 2.3.5]) ===
            for (unsigned j = 0; j < 7; ++j) {
                unsigned int bitPosition = (1 << j) - 1; //2^j-1
                if (shift.update ())
                    m_words[0][0] ^= uint64_t{1} << bitPosition;
            }
        }
    }
}

/*
================================================================
A readable and compact implementation of the Keccak sponge functions
that use the Keccak-f[1600] permutation.
================================================================
*/


void
keccak::update (
    const uint8_t *input,
    size_t len
) {
    unsigned int byterate = m_bitrate / 8;

    while (len) {
        auto chunk = util::min (len, byterate - m_cursor);

        for (unsigned i = 0; i < chunk; ++i)
            m_bytes[m_cursor++] ^= *input++;

        len -= chunk;

        if (m_cursor == byterate) {
            permute ();
            m_cursor = 0;
        }
    }
}


void
keccak::digest (
    uint8_t *output,
    size_t len
) {
    unsigned byterate = m_bitrate / 8u;

    // === Do the padding and switch to the squeezing phase ===
    // Absorb the last few bits and add the first bit of padding (which
    // coincides with the delimiter in delimitedSuffix)
    m_bytes[m_cursor] ^= m_suffix;

    // If the first bit of padding is at position rate-1, we need a whole new
    // block for the second bit of padding
    if (m_suffix & 0x80 && m_cursor == byterate - 1)
        permute ();

    // Add the second bit of padding
    m_bytes[byterate - 1] ^= 0x80;

    // === Squeeze out all the output blocks ===
    while (len) {
        permute ();

        auto chunk = util::min (len, byterate);
        std::copy_n (m_bytes.begin (), chunk, output);

        output += chunk;
        len    -= chunk;
    }
}


keccak::keccak (unsigned _bitrate,
                unsigned _capacity,
                uint8_t  _suffix):
    m_bitrate (_bitrate),
    m_capacity (_capacity),
    m_suffix (_suffix),
    m_cursor (0)
{
    // we could support bitrates that are multiples of 8, but 64 simplifies
    // some state handling, and the SHA-3 constants are all multiples of 64
    // bits anyway.
    if ((m_bitrate + m_capacity) / 8 != sizeof (m_bytes) || m_bitrate % 64 != 0)
        throw "error";

    std::fill (std::begin (m_bytes), std::end (m_bytes), 0);
}


#endif