2018-04-18 21:48:24 +10:00
|
|
|
/*
|
2018-08-04 15:14:06 +10:00
|
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
2018-04-18 21:48:24 +10:00
|
|
|
*
|
|
|
|
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include "debug.hpp"
|
|
|
|
#include "iterator.hpp"
|
|
|
|
#include "point.hpp"
|
|
|
|
|
|
|
|
#include <iterator>
|
|
|
|
|
|
|
|
namespace util {
|
|
|
|
// a simplistic implementation of Lloyd's algorithm
|
|
|
|
//
|
|
|
|
// returns index of the closest output for each input
|
2018-04-23 23:19:14 +10:00
|
|
|
template <typename OutputT, typename InputT, typename FunctionT>
|
2018-04-18 21:48:24 +10:00
|
|
|
std::vector<size_t>
|
2018-04-23 23:19:14 +10:00
|
|
|
kmeans (util::view<InputT> src, util::view<OutputT> dst, FunctionT const &&metric)
|
2018-04-18 21:48:24 +10:00
|
|
|
{
|
|
|
|
CHECK_GE (src.size (), dst.size ());
|
|
|
|
|
|
|
|
using coord_t = typename std::iterator_traits<InputT>::value_type;
|
|
|
|
const int iterations = 100;
|
|
|
|
|
|
|
|
std::vector<coord_t> means (src.begin (), src.begin () + dst.size ());
|
|
|
|
std::vector<coord_t> accum (dst.size ());
|
|
|
|
std::vector<size_t> count (dst.size ());
|
|
|
|
std::vector<size_t> closest (src.size ());
|
|
|
|
|
|
|
|
for (auto i = 0; i < iterations; ++i) {
|
|
|
|
std::fill (std::begin (accum), std::end (accum), 0);
|
|
|
|
std::fill (std::begin (count), std::end (count), 0);
|
|
|
|
|
|
|
|
for (auto const& [j,p]: util::izip (src)) {
|
|
|
|
size_t bucket = 0;
|
|
|
|
|
|
|
|
for (size_t k = 1; k < dst.size (); ++k) {
|
2018-04-23 23:19:14 +10:00
|
|
|
if (metric (p, means[k]) < metric (p, means[bucket]))
|
2018-04-18 21:48:24 +10:00
|
|
|
bucket = k;
|
|
|
|
}
|
|
|
|
|
|
|
|
accum[bucket] += p;
|
|
|
|
count[bucket] += 1;
|
|
|
|
closest[j] = bucket;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t j = 0; j < dst.size (); ++j)
|
|
|
|
means[j] = accum[j] / count[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
std::copy (std::begin (means), std::end (means), std::begin (dst));
|
|
|
|
|
|
|
|
return closest;
|
|
|
|
}
|
2018-04-23 23:19:14 +10:00
|
|
|
|
|
|
|
|
|
|
|
template <typename OutputT, typename InputT>
|
|
|
|
auto
|
|
|
|
kmeans (InputT &&src, OutputT &&dst)
|
|
|
|
{
|
|
|
|
return kmeans (
|
|
|
|
std::forward<InputT> (src),
|
|
|
|
std::forward<OutputT> (dst),
|
|
|
|
[] (auto a, auto b) {
|
2018-04-24 11:53:58 +10:00
|
|
|
return norm (a - b);
|
2018-04-23 23:19:14 +10:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2018-04-18 21:48:24 +10:00
|
|
|
}
|