hash/buzhash: add a trivial buzhash implementation
This commit is contained in:
parent
81715c1602
commit
209a6800c4
@ -328,6 +328,7 @@ list (
|
|||||||
hash/fwd.hpp
|
hash/fwd.hpp
|
||||||
hash/adler.cpp
|
hash/adler.cpp
|
||||||
hash/adler.hpp
|
hash/adler.hpp
|
||||||
|
hash/buzhash.hpp
|
||||||
hash/bsdsum.cpp
|
hash/bsdsum.cpp
|
||||||
hash/bsdsum.hpp
|
hash/bsdsum.hpp
|
||||||
hash/crc.cpp
|
hash/crc.cpp
|
||||||
@ -595,6 +596,7 @@ if (TESTS)
|
|||||||
geom/ray
|
geom/ray
|
||||||
geom/segment
|
geom/segment
|
||||||
geom/sphere
|
geom/sphere
|
||||||
|
hash/buzhash
|
||||||
hash/checksum
|
hash/checksum
|
||||||
hash/crc
|
hash/crc
|
||||||
hash/fasthash
|
hash/fasthash
|
||||||
|
109
hash/buzhash.hpp
Normal file
109
hash/buzhash.hpp
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019 Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "table.hpp"
|
||||||
|
#include "../bitwise.hpp"
|
||||||
|
#include "../std.hpp"
|
||||||
|
#include "../view.hpp"
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
namespace cruft::hash {
|
||||||
|
/// Implements a rolling hash by using a cyclic polynomial, aka buzhash.
|
||||||
|
/// see: https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
|
||||||
|
///
|
||||||
|
/// \tparam StateT The hash state size, and the result type.
|
||||||
|
/// \tparam WordT The data type for each term of the polynomial
|
||||||
|
/// \tparam HashT The object used to hash the words prior to mixing.
|
||||||
|
/// This needs to be very fast for most clients of the algorithm;
|
||||||
|
/// although functionally it could be any functor that maps WordT
|
||||||
|
/// onto StateT.
|
||||||
|
template <
|
||||||
|
typename StateT,
|
||||||
|
typename WordT = u08,
|
||||||
|
typename HashT = table<StateT>
|
||||||
|
>
|
||||||
|
class buzhash {
|
||||||
|
public:
|
||||||
|
static_assert (sizeof (StateT) >= sizeof (WordT));
|
||||||
|
|
||||||
|
template <typename ...Args>
|
||||||
|
buzhash (
|
||||||
|
std::size_t _width,
|
||||||
|
cruft::view<WordT const*> _init,
|
||||||
|
Args &&...args
|
||||||
|
)
|
||||||
|
: m_width (_width)
|
||||||
|
, m_hash (std::forward<Args> (args)...)
|
||||||
|
{
|
||||||
|
// Zero width would make for a constant zero hash.
|
||||||
|
CHECK_NEZ (_width);
|
||||||
|
// Rotations greater than data type size is often undefined.
|
||||||
|
CHECK_LT (_width, sizeof (WordT ) * 8);
|
||||||
|
CHECK_LT (_width, sizeof (StateT) * 8);
|
||||||
|
|
||||||
|
if (_init.size () < m_width)
|
||||||
|
throw std::out_of_range ("buzhash input too small");
|
||||||
|
|
||||||
|
// Prime the initial window
|
||||||
|
auto cursor = _init.begin ();
|
||||||
|
for (std::size_t i = 1; i <= m_width; i++, cursor++) {
|
||||||
|
m_state ^= rotatel (m_hash (*cursor), m_width - i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
buzhash (buzhash const&) = default;
|
||||||
|
buzhash (buzhash &&) noexcept = default;
|
||||||
|
buzhash& operator= (buzhash const&) = default;
|
||||||
|
buzhash& operator= (buzhash &&) = default;
|
||||||
|
|
||||||
|
/// Rotate the hash over a pointer to the buffer we've been operating
|
||||||
|
/// on.
|
||||||
|
///
|
||||||
|
/// The previous `width` bytes _must_ be dereferencable and identical
|
||||||
|
/// to the previously observed values.
|
||||||
|
StateT operator() (WordT const *cursor)
|
||||||
|
{
|
||||||
|
return (*this) (cursor[0], *(cursor - m_width));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Rotate a word into the hash, and the corresponding word out of the
|
||||||
|
/// hash.
|
||||||
|
///
|
||||||
|
/// The `removal` value _must_ be the same as the value seen `width`
|
||||||
|
/// values previously.
|
||||||
|
StateT operator() (WordT const addition, WordT const removal)
|
||||||
|
{
|
||||||
|
// Shift the polynomial
|
||||||
|
m_state = rotatel (m_state, 1)
|
||||||
|
// Remove the data that's about to leave our window.
|
||||||
|
^ rotatel (m_hash (removal), m_width)
|
||||||
|
// Mix in the new data
|
||||||
|
^ m_hash (addition);
|
||||||
|
|
||||||
|
return m_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// An observer for the hash state/value.
|
||||||
|
///
|
||||||
|
/// Provided for symmetry with other hash objects.
|
||||||
|
constexpr StateT digest (void) const noexcept { return m_state; }
|
||||||
|
operator StateT () const { return digest (); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::size_t m_width;
|
||||||
|
HashT m_hash;
|
||||||
|
StateT m_state = 0;
|
||||||
|
};
|
||||||
|
}
|
38
test/hash/buzhash.cpp
Normal file
38
test/hash/buzhash.cpp
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
#include "tap.hpp"
|
||||||
|
#include "hash/buzhash.hpp"
|
||||||
|
#include "std.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
int main ()
|
||||||
|
{
|
||||||
|
cruft::TAP::logger tap;
|
||||||
|
|
||||||
|
// Use buzhash to find the string 'needle' inside a larger string.
|
||||||
|
static constexpr u08 needle[] = "needle";
|
||||||
|
static constexpr std::size_t WIDTH = std::size (needle) - 1;
|
||||||
|
|
||||||
|
// Compute the hash of the needle
|
||||||
|
auto const key = cruft::hash::buzhash<u16> (WIDTH, needle).digest ();
|
||||||
|
|
||||||
|
// Find the point at which the hash object equals the key's digest.
|
||||||
|
static constexpr u08 haystack[] = "there is a needle here somewhere";
|
||||||
|
cruft::hash::buzhash<u16> h (WIDTH, haystack);
|
||||||
|
auto pos = std::find_if (
|
||||||
|
std::begin (haystack) + WIDTH,
|
||||||
|
std::end (haystack),
|
||||||
|
[&h, key] (auto const &i) { return h (&i) == key; }
|
||||||
|
);
|
||||||
|
|
||||||
|
tap.expect_eq (
|
||||||
|
key,
|
||||||
|
h.digest (),
|
||||||
|
"needle/haystack digests match"
|
||||||
|
);
|
||||||
|
|
||||||
|
tap.expect (
|
||||||
|
pos == haystack + strlen ("there is a needle") - 1,
|
||||||
|
"buzhash finds the haystack"
|
||||||
|
);
|
||||||
|
|
||||||
|
return tap.status ();
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user