analyse: avoid copying the chunk data for duplicate detection
This commit is contained in:
parent
00dc05ac8f
commit
b5e93c15fb
@ -220,18 +220,22 @@ int main (int argc, char const **argv)
|
||||
return accum + rhs.size ();
|
||||
});
|
||||
|
||||
// WARNING: this is destructive, but suits our purposes for the moment as
|
||||
// we intend to remove the reliance on in memory storage as much as we can.
|
||||
auto const init_size = src.size ();
|
||||
src.erase(
|
||||
std::unique (
|
||||
src.begin (),
|
||||
src.end (),
|
||||
region_equal
|
||||
),
|
||||
src.end ()
|
||||
);
|
||||
|
||||
std::vector<emory::chunk::region> unique;
|
||||
std::unique_copy (
|
||||
auto const unique_bytes = std::accumulate (
|
||||
src.begin (),
|
||||
src.end (),
|
||||
std::back_inserter (unique),
|
||||
region_equal
|
||||
);
|
||||
auto const unique_bytes = std::accumulate (
|
||||
unique.begin (),
|
||||
unique.end (),
|
||||
0, [] (auto const accum, auto const rhs) { return accum + rhs.size (); }
|
||||
0, [] (auto const &accum, auto const &rhs) { return accum + rhs.size (); }
|
||||
);
|
||||
|
||||
auto const duplicated_bytes = total_bytes - unique_bytes;
|
||||
@ -244,5 +248,5 @@ int main (int argc, char const **argv)
|
||||
100.f * duplicated_fraction
|
||||
);
|
||||
|
||||
fmt::print ("{} duplicates\n", src.size () - unique.size ());
|
||||
fmt::print ("{} duplicates\n", init_size - src.size ());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user