libcruft-util/hash/xxhash.cpp

209 lines
5.9 KiB
C++

/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 2016-2018 Danny Robson <danny@nerdcruft.net>
*/
#include "xxhash.hpp"
#include "../bitwise.hpp"
#include "../debug/assert.hpp"
#include "../endian.hpp"
#include <cstring>
using cruft::hash::xxhash;
///////////////////////////////////////////////////////////////////////////////
template <typename T>
static
T
read_le (const void *ptr)
{
return *static_cast<const T*> (ptr);
}
///////////////////////////////////////////////////////////////////////////////
template <typename T>
struct constants {
static const T prime[5];
static const T final_rotate[3];
static const T round_rotate;
};
//-----------------------------------------------------------------------------
template <>
const uint32_t
constants<uint32_t>::prime[5] = {
2654435761u,
2246822519u,
3266489917u,
668265263u,
374761393u,
};
template <>
const uint32_t
constants<uint32_t>::final_rotate[3] = {
15, 13, 16
};
//-----------------------------------------------------------------------------
template <>
const uint32_t
constants<uint32_t>::round_rotate = 13;
//-----------------------------------------------------------------------------
template <>
const uint64_t constants<uint64_t>::prime[5] = {
11400714785074694791ull,
14029467366897019727ull,
1609587929392839161ull,
9650029242287828579ull,
2870177450012600261ull,
};
template <>
const uint64_t
constants<uint64_t>::final_rotate[3] = {
33, 29, 32
};
//-----------------------------------------------------------------------------
template <>
const uint64_t
constants<uint64_t>::round_rotate = 31;
///////////////////////////////////////////////////////////////////////////////
template <typename T>
static
T
round (T state, T input)
{
state += input * constants<T>::prime[1];
state = cruft::rotatel (state, constants<T>::round_rotate);
state *= constants<T>::prime[0];
return state;
}
///////////////////////////////////////////////////////////////////////////////
template <typename WordT>
xxhash<WordT>::xxhash (WordT _seed):
m_seed (_seed)
{ ; }
///////////////////////////////////////////////////////////////////////////////
template <typename WordT>
typename xxhash<WordT>::digest_t
xxhash<WordT>::operator() (const cruft::view<const uint8_t*> data)
{
word_t state[4] {
m_seed + constants<WordT>::prime[0] + constants<WordT>::prime[1],
m_seed + constants<WordT>::prime[1],
m_seed,
m_seed - constants<WordT>::prime[0],
};
// consume block sized chunks while they're available.
// process each state word independently per block.
auto cursor = std::cbegin (data);
const auto last = std::cend (data);
while (last - cursor > block_bytes) {
for (int i = 0; i < 4; ++i) {
state[i] = round<word_t> (state[i], read_le<word_t> (cursor));
cursor += sizeof (word_t);
}
}
// leave the remainder. it's used midway through finalisation. note that we
// don't update the cursor as it's used to detect the remaining bytes
// during finalisation.
;
// compress the state and mix in the data size
word_t h;
if (data.size () < block_bytes) {
h = state[2] + constants<WordT>::prime[4];
} else {
h = rotatel (state[0], 1) +
rotatel (state[1], 7) +
rotatel (state[2], 12) +
rotatel (state[3], 18);
if constexpr (std::is_same_v<WordT,uint64_t>) {
h = (h ^ round<WordT> (0, state[0])) * constants<WordT>::prime[0] + constants<WordT>::prime[3];
h = (h ^ round<WordT> (0, state[1])) * constants<WordT>::prime[0] + constants<WordT>::prime[3];
h = (h ^ round<WordT> (0, state[2])) * constants<WordT>::prime[0] + constants<WordT>::prime[3];
h = (h ^ round<WordT> (0, state[3])) * constants<WordT>::prime[0] + constants<WordT>::prime[3];
}
}
h += static_cast<WordT> (data.size ());
// drain the remainder of the data, first by words...
while (cursor + sizeof (WordT) <= last) {
if constexpr (std::is_same_v<WordT,uint32_t>) {
h += read_le<WordT> (cursor) * constants<WordT>::prime[2];
h = rotatel (h, 17) * constants<WordT>::prime[3];
} else {
h = rotatel (
h ^ round<WordT> (0, read_le<WordT> (cursor)), 27
) * constants<WordT>::prime[0] + constants<WordT>::prime[3];
}
cursor += sizeof (WordT);
}
// ...then maybe by half words...
if constexpr (std::is_same_v<WordT,uint64_t>) {
while (cursor + sizeof (uint32_t) <= last) {
h = rotatel (
h ^ read_le<uint32_t> (cursor) * constants<WordT>::prime[0], 23
) * constants<WordT>::prime[1] + constants<WordT>::prime[2];
cursor += sizeof (uint32_t);
}
}
// ...then by bytes
while (cursor != last) {
if constexpr (std::is_same_v<WordT,uint32_t>) {
h += *cursor * constants<WordT>::prime[4];
h = rotatel (h, 11) * constants<WordT>::prime[0];
} else {
h = rotatel (h ^ *cursor * constants<WordT>::prime[4], 11) * constants<WordT>::prime[0];
}
++cursor;
}
// everything should have been consumed by now
CHECK_EQ (cursor, std::cend (data));
// mix the result one last time before returning
h ^= h >> constants<WordT>::final_rotate[0]; h *= constants<WordT>::prime[1];
h ^= h >> constants<WordT>::final_rotate[1]; h *= constants<WordT>::prime[2];
h ^= h >> constants<WordT>::final_rotate[2];
return h;
};
///////////////////////////////////////////////////////////////////////////////
template class cruft::hash::xxhash<uint32_t>;
template class cruft::hash::xxhash<uint64_t>;