From a61ea3222883289dc8946a36bb1fa978691fea54 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Wed, 20 May 2015 14:47:36 +1000 Subject: [PATCH] murmurhash: add initial translation --- Makefile.am | 10 ++ hash/murmur.cpp | 31 +++++ hash/murmur.hpp | 24 ++++ hash/murmur/common.cpp | 134 +++++++++++++++++++++ hash/murmur/common.hpp | 31 +++++ hash/murmur/murmur1.cpp | 63 ++++++++++ hash/murmur/murmur1.hpp | 29 +++++ hash/murmur/murmur2.cpp | 144 ++++++++++++++++++++++ hash/murmur/murmur2.hpp | 35 ++++++ hash/murmur/murmur3.cpp | 258 ++++++++++++++++++++++++++++++++++++++++ hash/murmur/murmur3.hpp | 35 ++++++ test/hash/murmur.cpp | 130 ++++++++++++++++++++ 12 files changed, 924 insertions(+) create mode 100644 hash/murmur.cpp create mode 100644 hash/murmur.hpp create mode 100644 hash/murmur/common.cpp create mode 100644 hash/murmur/common.hpp create mode 100644 hash/murmur/murmur1.cpp create mode 100644 hash/murmur/murmur1.hpp create mode 100644 hash/murmur/murmur2.cpp create mode 100644 hash/murmur/murmur2.hpp create mode 100644 hash/murmur/murmur3.cpp create mode 100644 hash/murmur/murmur3.hpp create mode 100644 test/hash/murmur.cpp diff --git a/Makefile.am b/Makefile.am index cedc0f65..c3b30b3a 100644 --- a/Makefile.am +++ b/Makefile.am @@ -71,6 +71,15 @@ UTIL_FILES = \ hash/md4.hpp \ hash/md5.cpp \ hash/md5.hpp \ + hash/murmur.hpp \ + hash/murmur/common.cpp \ + hash/murmur/common.hpp \ + hash/murmur/murmur1.cpp \ + hash/murmur/murmur1.hpp \ + hash/murmur/murmur2.cpp \ + hash/murmur/murmur2.hpp \ + hash/murmur/murmur3.cpp \ + hash/murmur/murmur3.hpp \ hash/pbkdf1.cpp \ hash/pbkdf1.hpp \ hash/pbkdf2.cpp \ @@ -277,6 +286,7 @@ TEST_BIN = \ test/extent \ test/fixed \ test/float \ + test/hash/murmur \ test/hmac \ test/hotp \ test/hton \ diff --git a/hash/murmur.cpp b/hash/murmur.cpp new file mode 100644 index 00000000..2f5fabae --- /dev/null +++ b/hash/murmur.cpp @@ -0,0 +1,31 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + + +#include "murmur.hpp" + +#include "bitwise.hpp" + +#include + + +struct seq { + seq (size_t _count): count (_count) { ; } + struct iter { size_t i; }; + iter begin (void) const { return iter {0}; }; + iter end (void) const { return iter {count}; }; + size_t count; +}; diff --git a/hash/murmur.hpp b/hash/murmur.hpp new file mode 100644 index 00000000..6d774264 --- /dev/null +++ b/hash/murmur.hpp @@ -0,0 +1,24 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#ifndef __UTIL_HASH_MURMUR_HPP +#define __UTIL_HASH_MURMUR_HPP + +#include "./murmur/murmur1.hpp" +#include "./murmur/murmur2.hpp" +#include "./murmur/murmur3.hpp" + +#endif diff --git a/hash/murmur/common.cpp b/hash/murmur/common.cpp new file mode 100644 index 00000000..f455b15b --- /dev/null +++ b/hash/murmur/common.cpp @@ -0,0 +1,134 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#include "common.hpp" + +#include "../../debug.hpp" + +/////////////////////////////////////////////////////////////////////////////// +uint32_t +util::hash::murmur::tail (const uint32_t *restrict data, size_t len) +{ + auto bytes = reinterpret_cast (data); + uint32_t h = 0; + + switch (len % sizeof (uint32_t)) { + case 3: h += bytes[2] << 16; + case 2: h += bytes[1] << 8; + case 1: h += bytes[0]; + break; + + default: + unreachable (); + } + + return h; +} + + +//----------------------------------------------------------------------------- +uint64_t +util::hash::murmur::tail (const uint64_t *restrict data, size_t len) +{ + auto bytes = reinterpret_cast (data); + uint64_t h = 0; + + switch (len % sizeof (uint64_t)) { + case 7: h += uint64_t(bytes[6]) << 48; + case 6: h += uint64_t(bytes[5]) << 40; + case 5: h += uint64_t(bytes[4]) << 32; + case 4: h += uint64_t(bytes[3]) << 24; + case 3: h += uint64_t(bytes[2]) << 16; + case 2: h += uint64_t(bytes[1]) << 8; + case 1: h += uint64_t(bytes[0]); + break; + + default: + unreachable (); + } + + return h; +} + + +//----------------------------------------------------------------------------- +std::array +util::hash::murmur::tail_array (const uint32_t *restrict data, size_t len) +{ + auto bytes = reinterpret_cast (data); + std::array result {0,0,0,0}; + + switch (len % 16) { + case 15: result[3] |= bytes[14] << 16; + case 14: result[3] |= bytes[13] << 8; + case 13: result[3] |= bytes[12] << 0; + case 12: result[2] |= bytes[11] << 24; + case 11: result[2] |= bytes[10] << 16; + case 10: result[2] |= bytes[ 9] << 8; + case 9: result[2] |= bytes[ 8] << 0; + case 8: result[1] |= bytes[ 7] << 24; + case 7: result[1] |= bytes[ 6] << 16; + case 6: result[1] |= bytes[ 5] << 8; + case 5: result[1] |= bytes[ 4] << 0; + case 4: result[0] |= bytes[ 3] << 24; + case 3: result[0] |= bytes[ 2] << 16; + case 2: result[0] |= bytes[ 1] << 8; + case 1: result[0] |= bytes[ 0] << 0; + break; + + default: + unreachable (); + } + + return result; +} + + +//----------------------------------------------------------------------------- +std::array +util::hash::murmur::tail_array (const uint64_t *restrict data, size_t len) +{ + auto bytes = reinterpret_cast (data); + std::array result {0, 0}; + + switch(len & 15) + { + case 15: result[1] |= ((uint64_t)bytes[14]) << 48; + case 14: result[1] |= ((uint64_t)bytes[13]) << 40; + case 13: result[1] |= ((uint64_t)bytes[12]) << 32; + case 12: result[1] |= ((uint64_t)bytes[11]) << 24; + case 11: result[1] |= ((uint64_t)bytes[10]) << 16; + case 10: result[1] |= ((uint64_t)bytes[ 9]) << 8; + case 9: result[1] |= ((uint64_t)bytes[ 8]) << 0; + + case 8: result[0] |= ((uint64_t)bytes[ 7]) << 56; + case 7: result[0] |= ((uint64_t)bytes[ 6]) << 48; + case 6: result[0] |= ((uint64_t)bytes[ 5]) << 40; + case 5: result[0] |= ((uint64_t)bytes[ 4]) << 32; + case 4: result[0] |= ((uint64_t)bytes[ 3]) << 24; + case 3: result[0] |= ((uint64_t)bytes[ 2]) << 16; + case 2: result[0] |= ((uint64_t)bytes[ 1]) << 8; + case 1: result[0] |= ((uint64_t)bytes[ 0]) << 0; + break; + + default: + unreachable (); + }; + + return result; +} + + diff --git a/hash/murmur/common.hpp b/hash/murmur/common.hpp new file mode 100644 index 00000000..5049e958 --- /dev/null +++ b/hash/murmur/common.hpp @@ -0,0 +1,31 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#include +#include +#include + +/////////////////////////////////////////////////////////////////////////////// +namespace util { namespace hash { namespace murmur { + uint32_t tail (const uint32_t *restrict data, size_t len); + uint64_t tail (const uint64_t *restrict data, size_t len); + + std::array + tail_array (const uint32_t *restrict data, size_t len); + + std::array + tail_array (const uint64_t *restrict data, size_t len); +} } } diff --git a/hash/murmur/murmur1.cpp b/hash/murmur/murmur1.cpp new file mode 100644 index 00000000..6a888700 --- /dev/null +++ b/hash/murmur/murmur1.cpp @@ -0,0 +1,63 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#include "murmur1.hpp" + +#include "common.hpp" +#include "../../debug.hpp" + +//----------------------------------------------------------------------------- +uint32_t +util::hash::murmur1::mix (uint32_t h, uint32_t k) +{ + static const uint32_t m = 0xc6a4a793; + + h += k; + h *= m; + h ^= h >> 16; + + return h; +} + + +//----------------------------------------------------------------------------- +uint32_t +util::hash::murmur1::hash_32 (const void *restrict data, + size_t len, + uint32_t seed) +{ + CHECK (data); + + static const uint32_t m = 0xc6a4a793; + uint32_t h = seed ^ (len * m); + + // mix the body + auto cursor = reinterpret_cast (data); + auto last = cursor + len / sizeof (uint32_t); + for (; cursor < last; ++cursor) + h = mix (h, *cursor); + + // mix the tail + if (len % sizeof (uint32_t)) + h = mix (h, murmur::tail (cursor, len)); + + // finalise + h *= m; h ^= h >> 10; + h *= m; h ^= h >> 17; + + return h; +} + diff --git a/hash/murmur/murmur1.hpp b/hash/murmur/murmur1.hpp new file mode 100644 index 00000000..c28b7635 --- /dev/null +++ b/hash/murmur/murmur1.hpp @@ -0,0 +1,29 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#ifndef __UTIL_HASH_MURMUR_MURMUR1_HPP +#define __UTIL_HASH_MURMUR_MURMUR1_HPP + +#include +#include + +// Austin Appleby's MumurHash1 +namespace util { namespace hash { namespace murmur1 { + uint32_t mix (uint32_t, uint32_t); + uint32_t hash_32 (const void *restrict data, size_t len, uint32_t seed); +} } } + +#endif diff --git a/hash/murmur/murmur2.cpp b/hash/murmur/murmur2.cpp new file mode 100644 index 00000000..f052f6ec --- /dev/null +++ b/hash/murmur/murmur2.cpp @@ -0,0 +1,144 @@ + +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#include "murmur2.hpp" + +#include "../../debug.hpp" +#include "common.hpp" + + +/////////////////////////////////////////////////////////////////////////////// +template struct constants { }; + +template <> +struct constants { + static const uint32_t m = 0x5bd1e995; + static const int32_t r = 24; +}; + +template <> +struct constants { + static const uint64_t m = 0xc6a4a7935bd1e995; + static const int64_t r = 47; +}; + + +///////////////////////////////////////////////////////////////////// +uint32_t +util::hash::murmur2::mix (uint32_t h, uint32_t k) +{ + static const uint32_t m = constants::m; + static const uint32_t r = constants::r; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + return h; +} + + +//----------------------------------------------------------------------------- +// 64 bit murmur2 mixing function. Note the last two lines are swapped +// compared with 32 bit murmur2_mix. It's not clear if this is deliberate +// in the canonical implementation, so we just leave it to help compatibility. +uint64_t +util::hash::murmur2::mix (uint64_t h, uint64_t k) +{ + static const uint64_t m = constants::m; + static const uint64_t r = constants::r; + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + + return h; +} + + +/////////////////////////////////////////////////////////////////////////////// +uint32_t +util::hash::murmur2::hash_32 (const void *restrict key, + size_t len, + uint32_t seed) +{ + CHECK (key); + + // setup + static const auto m = constants::m; + uint32_t h = seed ^ len; + + // body + auto cursor = reinterpret_cast (key); + auto last = cursor + len / sizeof (uint32_t); + for (; cursor < last; ++cursor) + h = mix (h, *cursor); + + // tail + if (len % sizeof (uint32_t)) { + h ^= murmur::tail (cursor, len); + h *= m; + } + + // finalise + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + + +//----------------------------------------------------------------------------- +uint64_t +util::hash::murmur2::hash_64 (const void *restrict key, + size_t len, + uint64_t seed) +{ + // setup + const auto m = constants::m; + const auto r = constants::r; + + uint64_t h = seed ^ (len * m); + + // body + auto cursor = reinterpret_cast (key); + auto last = cursor + len / sizeof (uint64_t); + for (; cursor < last; ++cursor) + h = mix (h, *cursor); + + // tail + if (len % sizeof (uint64_t)) { + h ^= murmur::tail (cursor, len); + h *= m; + } + + // finalise + h ^= h >> r; + h *= m; + h ^= h >> r; + + return h; +} + + diff --git a/hash/murmur/murmur2.hpp b/hash/murmur/murmur2.hpp new file mode 100644 index 00000000..50973d8d --- /dev/null +++ b/hash/murmur/murmur2.hpp @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#ifndef __UTIL_HASH_MURMUR_MURMUR2_HPP +#define __UTIL_HASH_MURMUR_MURMUR2_HPP + +#include +#include + +// Austin Appleby's MumurHash2, and MurmurHash64A. The exhaustive list of +// variants is deliberately not provided. You can damn well align your data or +// fix the algorithm. +namespace util { namespace hash { namespace murmur2 { + uint32_t mix (uint32_t, uint32_t); + uint64_t mix (uint64_t, uint64_t); + + uint32_t hash_32 (const void *restrict data, size_t len, uint32_t seed); + uint64_t hash_64 (const void *restrict data, size_t len, uint64_t seed); +} } } + +#endif + diff --git a/hash/murmur/murmur3.cpp b/hash/murmur/murmur3.cpp new file mode 100644 index 00000000..b7d9a3a2 --- /dev/null +++ b/hash/murmur/murmur3.cpp @@ -0,0 +1,258 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#include "murmur3.hpp" + +#include "common.hpp" +#include "../../bitwise.hpp" + +#include + + +/////////////////////////////////////////////////////////////////////////////// +// Finalization mix - force all bits of a hash block to avalanche +uint32_t +util::hash::murmur3::mix (uint32_t h) +{ + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + + +//----------------------------------------------------------------------------- +uint64_t +util::hash::murmur3::mix (uint64_t k) +{ + k ^= k >> 33; + k *= 0xff51afd7ed558ccd; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53; + k ^= k >> 33; + + return k; +} + + +//----------------------------------------------------------------------------- +uint32_t +util::hash::murmur3::hash_32(const void *restrict key, + size_t len, + uint32_t seed) +{ + auto data = reinterpret_cast (key); + auto nblocks = len / sizeof (uint32_t); + + uint32_t h1 = seed; + + static const uint32_t c1 = 0xcc9e2d51; + static const uint32_t c2 = 0x1b873593; + + //---------- + // body + auto cursor = reinterpret_cast (data); + auto last = cursor + nblocks; + for (; cursor < last; ++cursor) { + uint32_t k1 = *cursor; + + k1 *= c1; + k1 = rotatel (k1, 15); + k1 *= c2; + h1 ^= k1; + + h1 = rotatel (h1, 13); + h1 += 0; + h1 = h1 * 5 + 0xe6546b64; + } + + //---------- + // tail + if (len % sizeof (uint32_t)) { + uint32_t k1 = 0 ^ murmur::tail (cursor, len); + + k1 *= c1; + k1 = rotatel (k1, 15); + k1 *= c2; + h1 ^= k1; + } + + //---------- + // finalization + + h1 ^= len; + h1 = mix (h1); + + return h1; +} + + +/////////////////////////////////////////////////////////////////////////////// +template +struct constants { + T c; + T Ks; + T Hs; + uint32_t O; +}; + + +//----------------------------------------------------------------------------- +template +struct traits { + static constexpr size_t COMPONENTS = 16 / sizeof (T); + static const constants X[COMPONENTS]; +}; + + +//----------------------------------------------------------------------------- +template <> +const constants +traits::X[] = { + { 0x239b961b, 15, 19, 0x561ccd1b }, + { 0xab0e9789, 16, 17, 0x0bcaa747 }, + { 0x38b34ae5, 17, 15, 0x96cd1c35 }, + { 0xa1e38b93, 18, 13, 0x32ac3b17 }, +}; + + +//----------------------------------------------------------------------------- +template <> +const constants +traits::X[] = { + { 0x87c37b91114253d5, 31, 27, 0x52dce729 }, + { 0x4cf5ad432745937f, 33, 31, 0x38495ab5 } +}; + + +/////////////////////////////////////////////////////////////////////////////// +template +std::array::COMPONENTS> +half_round (std::array::COMPONENTS> h, + std::array::COMPONENTS> k, + size_t i) +{ + auto COMPONENTS = traits::COMPONENTS; + auto CONSTANTS = traits::X; + + auto i_ = (i + 1) % COMPONENTS; + k[i] *= CONSTANTS[i].c; + k[i] = rotatel (k[i], CONSTANTS[i].Ks); + k[i] *= CONSTANTS[i_].c; + h[i] ^= k[i]; + return h; +} + + +//----------------------------------------------------------------------------- +template +std::array::COMPONENTS> +full_round (std::array::COMPONENTS> h, + std::array::COMPONENTS> k, + size_t i) +{ + auto COMPONENTS = traits::COMPONENTS; + auto CONSTANTS = traits::X; + + h = half_round (h, k, i); + + auto i_ = (i + 1) % COMPONENTS; + h[i] = rotatel (h[i], CONSTANTS[i].Hs); + h[i] += h[i_]; + h[i] = h[i] * 5 + CONSTANTS[i].O; + + return h; +} + + +/////////////////////////////////////////////////////////////////////////////// +template +std::array::COMPONENTS> +hash_128 (const void *restrict key, + const size_t len, + const uint32_t seed) +{ + // Initialise the hash + static const size_t BLOCK_SIZE = 16; + using result_t = std::array::COMPONENTS>; + + result_t h; + h.fill (seed); + + // process the body + auto cursor = reinterpret_cast (key); + auto last = cursor + len / BLOCK_SIZE; + for (; cursor < last; cursor += traits::COMPONENTS) { + result_t k; + std::copy_n (cursor, traits::COMPONENTS, k.begin ()); + + for (size_t i = 0; i < traits::COMPONENTS; ++i) + h = full_round (h, k, i); + } + + // process the tail + if (len % 16) { + auto k = util::hash::murmur::tail_array (cursor, len); + + for (auto &v: k) + v = 0 ^ v; + + for (size_t i = 0; i < traits::COMPONENTS; ++i) + h = half_round (h, k, i); + } + + // finalise the hash + for (auto &v: h) + v ^= len; + + for (size_t i = 1; i < traits::COMPONENTS; ++i) + h[0] += h[i]; + for (size_t i = 1; i < traits::COMPONENTS; ++i) + h[i] += h[0]; + + for (auto &v: h) + v = util::hash::murmur3::mix (v); + + for (size_t i = 1; i < traits::COMPONENTS; ++i) + h[0] += h[i]; + for (size_t i = 1; i < traits::COMPONENTS; ++i) + h[i] += h[0]; + + return h; +} + + +/////////////////////////////////////////////////////////////////////////////// +std::array +util::hash::murmur3::hash_128_x86 (const void *restrict key, + const size_t len, + const uint32_t seed) +{ + return ::hash_128 (key, len, seed); +} + + +//----------------------------------------------------------------------------- +std::array +util::hash::murmur3::hash_128_x64 (const void *restrict key, + size_t len, + const uint32_t seed) +{ + return ::hash_128 (key, len, seed); +} diff --git a/hash/murmur/murmur3.hpp b/hash/murmur/murmur3.hpp new file mode 100644 index 00000000..3beec12a --- /dev/null +++ b/hash/murmur/murmur3.hpp @@ -0,0 +1,35 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 2015 Danny Robson + */ + +#ifndef __UTIL_HASH_MURMUR_MURMUR3_HPP +#define __UTIL_HASH_MURMUR_MURMUR3_HPP + +#include +#include +#include + +// Austin Appleby's MurmurHash3 +namespace util { namespace hash { namespace murmur3 { + uint32_t mix (uint32_t); + uint64_t mix (uint64_t); + + uint32_t hash_32 (const void *restrict data, size_t len, uint32_t seed); + std::array hash_128_x86 (const void *restrict data, size_t len, uint32_t seed); + std::array hash_128_x64 (const void *restrict data, size_t len, uint32_t seed); +} } } + +#endif + diff --git a/test/hash/murmur.cpp b/test/hash/murmur.cpp new file mode 100644 index 00000000..8a14553d --- /dev/null +++ b/test/hash/murmur.cpp @@ -0,0 +1,130 @@ +#include "tap.hpp" + +#include "hash/murmur.hpp" +#include "types.hpp" + +#include + + +void +test (util::TAP::logger &tap) +{ + struct { + const char *key; + + struct { uint32_t seed; uint32_t hash; } m1_32; + struct { uint32_t seed; uint32_t hash; } m2_32; + struct { uint64_t seed; uint64_t hash; } m2_64; + struct { uint32_t seed; uint32_t hash; } m3_32; + + struct { uint32_t seed; std::array hash; } m3_128_x86; + struct { uint32_t seed; std::array hash; } m3_128_x64; + + } TESTS[] = { + { "", + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + { 0, 0 }, + }, + + { "", + { 0x00000001, 0x8f5a8d63 }, + { 0x00000001, 0x5bd15e36 }, + { 1, 0xc6a4a7935bd064dc }, + { 1, 0x514e28b7 }, + { 1, { 0x88c4adec, 0x54d201b9, 0x54d201b9, 0x54d201b9 } }, + { 1, { 0x4610abe56eff5cb5, 0x51622daa78f83583 } }, + }, + + { "", + { 0xffffffff, 0x7a3f4f7e }, + { 0xffffffff, 0xb35966b0 }, + { uint64_t(-1), 0xb0d9485c2cd761b2 }, + { 0xffffffff, 0x81f16f39 }, + { 0xffffffff, { 0x051e08a9, 0x989d49f7, 0x989d49f7, 0x989d49f7 } }, + { 0xffffffff, { 0x6af1df4d9d3bc9ec, 0x857421121ee6446b } }, + }, + + { "a", + { 0x7a3f4f7e, 0x18abad09 }, + { 0xb35966b0, 0x1eea8b10 }, + { 0xb0d9485c2cd761b2, 0x0a9b4c93b35b1b9f }, + { 0x81f16f39, 0x80d3460d }, + { 0x051e08a9, { 0x08e91d27, 0x12c6d92a, 0x12c6d92a, 0x12c6d92a } }, + { 0x9d3bc9ec, { 0xf79489c9f1a785de, 0xf6486d31835a9c7f } }, + }, + { "abc", + { 0x18abad09, 0x1defb5e9 }, + { 0x1eea8b10, 0x72cac527 }, + { 0x0a9b4c93b35b1b9f, 0x2ffdf3214d9a4fa4 }, + { 0x80d3460d, 0x622f3384 }, + { 0x08e91d27, { 0xc11cc883, 0xb5d7f69a, 0xb5d7f69a, 0xb5d7f69a } }, + { 0xf1a785de, { 0x946e5ee63ce3b80e, 0xadb7d6d0e2558c3c } }, + }, + { "message digest", + { 0x1defb5e9, 0x7b3ea4bd }, + { 0x72cac527, 0x68563c37 }, + { 0x2ffdf3214d9a4fa4, 0x9a83e79336350cee }, + { 0x622f3384, 0x6884feac }, + { 0xc11cc883, { 0x9e3cc608, 0x90eba4c0, 0xc3775cf8, 0x10eb4fee } }, + { 0x3ce3b80e, { 0x2c91b16326bf5f7f, 0xa21acf13c39485bc } }, + }, + + { "abcdefghijklmnopqrstuvwxyz", + { 0x7b3ea4bd, 0xd94ee9ea }, + { 0x68563c37, 0x0473b699 }, + { 0x9a83e79336350cee, 0x1f256c898952ae12 }, + { 0x6884feac, 0x99befd9e }, + { 0x9e3cc608, { 0x7de6cb3c, 0x62268725, 0x7c9f0b06, 0x5d7e2281 } }, + { 0x26bf5f7f, { 0xdbe75dee9ce0b3cb, 0x006c66d16f72b399, } }, + } + }; + + bool m1_32 = true; + bool m2_32 = true; + bool m2_64 = true; + bool m3_32 = true; + + bool m3_128_x86 = true; + bool m3_128_x64 = true; + + for (const auto &t: TESTS) { + m1_32 = m1_32 && (t.m1_32.hash == util::hash::murmur1::hash_32 (t.key, strlen (t.key), t.m1_32.seed)); + m2_32 = m2_32 && (t.m2_32.hash == util::hash::murmur2::hash_32 (t.key, strlen (t.key), t.m2_32.seed)); + m2_64 = m2_64 && (t.m2_64.hash == util::hash::murmur2::hash_64 (t.key, strlen (t.key), t.m2_64.seed)); + m3_32 = m3_32 && (t.m3_32.hash == util::hash::murmur3::hash_32 (t.key, strlen (t.key), t.m3_32.seed)); + + { + auto result = util::hash::murmur3::hash_128_x86 (t.key, strlen (t.key), t.m3_128_x86.seed); + bool success = t.m3_128_x86.hash == result; + m3_128_x86 = m3_128_x86 && success; + } + + { + auto result = util::hash::murmur3::hash_128_x64 (t.key, strlen (t.key), t.m3_128_x64.seed); + bool success = t.m3_128_x64.hash == result; + m3_128_x64 = m3_128_x64 && success; + } + } + + tap.expect (m1_32, "murmur1_32"); + tap.expect (m2_32, "murmur2_32"); + tap.expect (m2_64, "murmur2_64"); + tap.expect (m3_32, "murmur3_32"); + tap.expect (m3_128_x86, "murmur3_128_x86"); + tap.expect (m3_128_x64, "murmur3_128_x64"); +} + + +int +main (void) +{ + util::TAP::logger tap; + + test (tap); + + return tap.status (); +}