kmeans: add naive kmeans impl
This commit is contained in:
parent
dcd789a075
commit
f31a344912
@ -303,6 +303,7 @@ list (
|
||||
json2/personality/rfc7519.hpp
|
||||
json2/tree.cpp
|
||||
json2/tree.hpp
|
||||
kmeans.hpp
|
||||
library.hpp
|
||||
log.cpp
|
||||
log.hpp
|
||||
@ -509,6 +510,7 @@ if (TESTS)
|
||||
job/queue
|
||||
json_types
|
||||
json2/event
|
||||
kmeans
|
||||
maths
|
||||
maths/fast
|
||||
matrix
|
||||
|
68
kmeans.hpp
Normal file
68
kmeans.hpp
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* Copyright 2018 Danny Robson <danny@nerdcruft.net>
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "debug.hpp"
|
||||
#include "iterator.hpp"
|
||||
#include "point.hpp"
|
||||
|
||||
#include <iterator>
|
||||
|
||||
namespace util {
|
||||
// a simplistic implementation of Lloyd's algorithm
|
||||
//
|
||||
// returns index of the closest output for each input
|
||||
template <typename OutputT, typename InputT>
|
||||
std::vector<size_t>
|
||||
kmeans (util::view<InputT> src, util::view<OutputT> dst)
|
||||
{
|
||||
CHECK_GE (src.size (), dst.size ());
|
||||
|
||||
using coord_t = typename std::iterator_traits<InputT>::value_type;
|
||||
const int iterations = 100;
|
||||
|
||||
std::vector<coord_t> means (src.begin (), src.begin () + dst.size ());
|
||||
std::vector<coord_t> accum (dst.size ());
|
||||
std::vector<size_t> count (dst.size ());
|
||||
std::vector<size_t> closest (src.size ());
|
||||
|
||||
for (auto i = 0; i < iterations; ++i) {
|
||||
std::fill (std::begin (accum), std::end (accum), 0);
|
||||
std::fill (std::begin (count), std::end (count), 0);
|
||||
|
||||
for (auto const& [j,p]: util::izip (src)) {
|
||||
size_t bucket = 0;
|
||||
|
||||
for (size_t k = 1; k < dst.size (); ++k) {
|
||||
if (norm2 (p - means[k]) < norm2 (p - means[bucket]))
|
||||
bucket = k;
|
||||
}
|
||||
|
||||
accum[bucket] += p;
|
||||
count[bucket] += 1;
|
||||
closest[j] = bucket;
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < dst.size (); ++j)
|
||||
means[j] = accum[j] / count[j];
|
||||
}
|
||||
|
||||
std::copy (std::begin (means), std::end (means), std::begin (dst));
|
||||
|
||||
return closest;
|
||||
}
|
||||
}
|
35
test/kmeans.cpp
Normal file
35
test/kmeans.cpp
Normal file
@ -0,0 +1,35 @@
|
||||
#include "tap.hpp"
|
||||
|
||||
#include "kmeans.hpp"
|
||||
|
||||
#include <cruft/util/point.hpp>
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
int
|
||||
main ()
|
||||
{
|
||||
util::TAP::logger tap;
|
||||
|
||||
// create one point and check it 'converges' to this one point
|
||||
{
|
||||
const std::array<util::point3f,1> p { {{1,2,3}} };
|
||||
std::array<util::point3f,1> q;
|
||||
|
||||
util::kmeans (util::view{p}, util::view{q});
|
||||
tap.expect_eq (p, q, "single point, single k");
|
||||
}
|
||||
|
||||
// create two vectors, check if the mean converges to their average
|
||||
{
|
||||
const std::array<util::vector3f,2> p {{
|
||||
{1}, {2}
|
||||
}};
|
||||
std::array<util::vector3f,1> q;
|
||||
|
||||
util::kmeans (util::view{p}, util::view{q});
|
||||
tap.expect_eq (q[0], (p[0]+p[1])/2, "two point, single k");
|
||||
}
|
||||
|
||||
return tap.status ();
|
||||
}
|
Loading…
Reference in New Issue
Block a user