libcruft-util/cruft/util/hash/buzhash.hpp

111 lines
3.6 KiB
C++
Raw Normal View History

/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* Copyright 2019 Danny Robson <danny@nerdcruft.net>
*/
#pragma once
#include "table.hpp"
#include "../bitwise.hpp"
#include "../std.hpp"
#include "../view.hpp"
#include <cstddef>
///////////////////////////////////////////////////////////////////////////////
namespace cruft::hash {
/// Implements a rolling hash by using a cyclic polynomial, aka buzhash.
/// see: https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
///
/// \tparam StateT The hash state size, and the result type.
/// \tparam WordT The data type for each term of the polynomial
/// \tparam HashT The object used to hash the words prior to mixing.
/// This needs to be very fast for most clients of the algorithm;
/// although functionally it could be any functor that maps WordT
/// onto StateT.
template <
typename StateT,
typename WordT = u08,
typename HashT = table<StateT>
>
class buzhash {
public:
static_assert (sizeof (StateT) >= sizeof (WordT));
2019-04-23 08:54:58 +10:00
using digest_type = StateT;
template <typename ...Args>
buzhash (
std::size_t _width,
cruft::view<WordT const*> _init,
Args &&...args
)
: m_width (_width)
, m_hash (std::forward<Args> (args)...)
{
// Zero width would make for a constant zero hash.
CHECK_NEZ (_width);
// Rotations greater than data type size are often undefined.
CHECK_LT (_width, sizeof (StateT) * 8);
if (_init.size () < m_width)
throw std::out_of_range ("buzhash input too small");
// Prime the initial window
auto cursor = _init.begin ();
for (std::size_t i = 1; i <= m_width; i++, cursor++) {
m_state ^= rotatel (m_hash (*cursor), m_width - i);
}
}
buzhash (buzhash const&) = default;
buzhash (buzhash &&) noexcept = default;
buzhash& operator= (buzhash const&) = default;
2021-04-19 14:52:22 +10:00
buzhash& operator= (buzhash &&) noexcept = default;
/// Rotate the hash over a pointer to the buffer we've been operating
/// on.
///
/// The previous `width` bytes _must_ be dereferencable and identical
/// to the previously observed values.
StateT operator() (WordT const *cursor)
{
return (*this) (cursor[0], *(cursor - m_width));
}
/// Rotate a word into the hash, and the corresponding word out of the
/// hash.
///
/// The `removal` value _must_ be the same as the value seen `width`
/// values previously.
StateT operator() (WordT const addition, WordT const removal)
{
// Shift the polynomial
m_state = rotatel (m_state, 1)
// Remove the data that's about to leave our window.
^ rotatel (m_hash (removal), m_width)
// Mix in the new data
^ m_hash (addition);
return m_state;
}
/// An observer for the hash state/value.
///
/// Provided for symmetry with other hash objects.
constexpr StateT digest (void) const noexcept { return m_state; }
operator StateT () const { return digest (); }
private:
std::size_t m_width;
HashT m_hash;
StateT m_state = 0;
};
}