analyse: avoid copying the chunk data for duplicate detection

2020-12-29 08:37:32 +10:00 · 2020-12-29 08:37:32 +10:00 · b5e93c15fb
commit b5e93c15fb
parent 00dc05ac8f
1 changed files with 14 additions and 10 deletions
--- a/tools/analyse.cpp
+++ b/tools/analyse.cpp
@ -220,18 +220,22 @@ int main (int argc, char const **argv)
        return accum + rhs.size ();
    });
-
+    // WARNING: this is destructive, but suits our purposes for the moment as
-    std::vector<emory::chunk::region> unique;
+    // we intend to remove the reliance on in memory storage as much as we can.
-    std::unique_copy (
+    auto const init_size = src.size ();
    src.erase(
        std::unique (
            src.begin (),
            src.end   (),
        std::back_inserter (unique),
            region_equal
        ),
        src.end ()
    );
    auto const unique_bytes = std::accumulate (
-        unique.begin (),
+        src.begin (),
-        unique.end   (),
+        src.end   (),
-        0, [] (auto const accum, auto const rhs) { return accum + rhs.size (); }
+        0, [] (auto const &accum, auto const &rhs) { return accum + rhs.size (); }
    );
    auto const duplicated_bytes = total_bytes - unique_bytes;
@ -244,5 +248,5 @@ int main (int argc, char const **argv)
        100.f * duplicated_fraction
    );
-    fmt::print ("{} duplicates\n", src.size () - unique.size ());
+    fmt::print ("{} duplicates\n", init_size - src.size ());
 }