From b5e93c15fb1c1188f6b2bf94fe2ef98ba34e6574 Mon Sep 17 00:00:00 2001 From: Danny Robson Date: Tue, 29 Dec 2020 08:37:32 +1000 Subject: [PATCH] analyse: avoid copying the chunk data for duplicate detection --- tools/analyse.cpp | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/tools/analyse.cpp b/tools/analyse.cpp index effce36..7ddfdfa 100644 --- a/tools/analyse.cpp +++ b/tools/analyse.cpp @@ -220,18 +220,22 @@ int main (int argc, char const **argv) return accum + rhs.size (); }); + // WARNING: this is destructive, but suits our purposes for the moment as + // we intend to remove the reliance on in memory storage as much as we can. + auto const init_size = src.size (); + src.erase( + std::unique ( + src.begin (), + src.end (), + region_equal + ), + src.end () + ); - std::vector unique; - std::unique_copy ( + auto const unique_bytes = std::accumulate ( src.begin (), src.end (), - std::back_inserter (unique), - region_equal - ); - auto const unique_bytes = std::accumulate ( - unique.begin (), - unique.end (), - 0, [] (auto const accum, auto const rhs) { return accum + rhs.size (); } + 0, [] (auto const &accum, auto const &rhs) { return accum + rhs.size (); } ); auto const duplicated_bytes = total_bytes - unique_bytes; @@ -244,5 +248,5 @@ int main (int argc, char const **argv) 100.f * duplicated_fraction ); - fmt::print ("{} duplicates\n", src.size () - unique.size ()); + fmt::print ("{} duplicates\n", init_size - src.size ()); }