/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2018 Danny Robson */ #pragma once #include #include #include #include namespace cruft { // a simplistic implementation of Lloyd's algorithm // // returns index of the closest output for each input template std::vector kmeans (cruft::view src, cruft::view dst, FunctionT const &&metric) { CHECK_GE (src.size (), dst.size ()); using coord_t = typename std::iterator_traits::value_type; const int iterations = 100; std::vector means (src.begin (), src.begin () + dst.size ()); std::vector accum (dst.size ()); std::vector count (dst.size ()); std::vector closest (src.size ()); for (auto i = 0; i < iterations; ++i) { std::fill (std::begin (accum), std::end (accum), 0); std::fill (std::begin (count), std::end (count), 0); for (auto const& [j,p]: iterator::izip (src)) { size_t bucket = 0; for (size_t k = 1; k < dst.size (); ++k) { if (metric (p, means[k]) < metric (p, means[bucket])) bucket = k; } accum[bucket] += p; count[bucket] += 1; closest[j] = bucket; } for (size_t j = 0; j < dst.size (); ++j) means[j] = accum[j] / count[j]; } std::copy (std::begin (means), std::end (means), std::begin (dst)); return closest; } template auto kmeans (InputT &&src, OutputT &&dst) { return kmeans ( std::forward (src), std::forward (dst), [] (auto a, auto b) { return norm (a - b); }); } }