/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2018 Danny Robson */ #pragma once #include "debug.hpp" #include "iterator.hpp" #include "point.hpp" #include namespace util { // a simplistic implementation of Lloyd's algorithm // // returns index of the closest output for each input template std::vector kmeans (util::view src, util::view dst, FunctionT const &&metric) { CHECK_GE (src.size (), dst.size ()); using coord_t = typename std::iterator_traits::value_type; const int iterations = 100; std::vector means (src.begin (), src.begin () + dst.size ()); std::vector accum (dst.size ()); std::vector count (dst.size ()); std::vector closest (src.size ()); for (auto i = 0; i < iterations; ++i) { std::fill (std::begin (accum), std::end (accum), 0); std::fill (std::begin (count), std::end (count), 0); for (auto const& [j,p]: util::izip (src)) { size_t bucket = 0; for (size_t k = 1; k < dst.size (); ++k) { if (metric (p, means[k]) < metric (p, means[bucket])) bucket = k; } accum[bucket] += p; count[bucket] += 1; closest[j] = bucket; } for (size_t j = 0; j < dst.size (); ++j) means[j] = accum[j] / count[j]; } std::copy (std::begin (means), std::end (means), std::begin (dst)); return closest; } template auto kmeans (InputT &&src, OutputT &&dst) { return kmeans ( std::forward (src), std::forward (dst), [] (auto a, auto b) { return norm (a - b); }); } }