analyse: avoid copying the chunk data for duplicate detection
This commit is contained in:
parent
00dc05ac8f
commit
b5e93c15fb
@ -220,18 +220,22 @@ int main (int argc, char const **argv)
|
|||||||
return accum + rhs.size ();
|
return accum + rhs.size ();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// WARNING: this is destructive, but suits our purposes for the moment as
|
||||||
std::vector<emory::chunk::region> unique;
|
// we intend to remove the reliance on in memory storage as much as we can.
|
||||||
std::unique_copy (
|
auto const init_size = src.size ();
|
||||||
|
src.erase(
|
||||||
|
std::unique (
|
||||||
src.begin (),
|
src.begin (),
|
||||||
src.end (),
|
src.end (),
|
||||||
std::back_inserter (unique),
|
|
||||||
region_equal
|
region_equal
|
||||||
|
),
|
||||||
|
src.end ()
|
||||||
);
|
);
|
||||||
|
|
||||||
auto const unique_bytes = std::accumulate (
|
auto const unique_bytes = std::accumulate (
|
||||||
unique.begin (),
|
src.begin (),
|
||||||
unique.end (),
|
src.end (),
|
||||||
0, [] (auto const accum, auto const rhs) { return accum + rhs.size (); }
|
0, [] (auto const &accum, auto const &rhs) { return accum + rhs.size (); }
|
||||||
);
|
);
|
||||||
|
|
||||||
auto const duplicated_bytes = total_bytes - unique_bytes;
|
auto const duplicated_bytes = total_bytes - unique_bytes;
|
||||||
@ -244,5 +248,5 @@ int main (int argc, char const **argv)
|
|||||||
100.f * duplicated_fraction
|
100.f * duplicated_fraction
|
||||||
);
|
);
|
||||||
|
|
||||||
fmt::print ("{} duplicates\n", src.size () - unique.size ());
|
fmt::print ("{} duplicates\n", init_size - src.size ());
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user