analyse: print duplicated count and total size
This commit is contained in:
parent
35a8420d28
commit
9b6e1e770f
@ -80,7 +80,73 @@ int main (int argc, char const **argv)
|
|||||||
};
|
};
|
||||||
|
|
||||||
cruft::mapped_file data (argv[ARGS_INPUT]);
|
cruft::mapped_file data (argv[ARGS_INPUT]);
|
||||||
|
std::cout << "size: " << data.size () << '\n';
|
||||||
|
|
||||||
|
std::cout << "processing\n";
|
||||||
emory::chunk::map src (data, p);
|
emory::chunk::map src (data, p);
|
||||||
|
|
||||||
|
std::cout << "validating\n";
|
||||||
std::cout << src.size () << " chunks\n";
|
std::cout << src.size () << " chunks\n";
|
||||||
|
std::sort (
|
||||||
|
src.elements.begin (),
|
||||||
|
src.elements.end (),
|
||||||
|
[] (auto const &a, auto const &b) { return a.offset.first < b.offset.first; }
|
||||||
|
);
|
||||||
|
for (int i = 0, cursor = 0; i < std::ssize (src.elements); ++i) {
|
||||||
|
if (src.elements[i].offset.first != cursor) {
|
||||||
|
std::cout << "non-overlapping chunks\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
cursor = src.elements[i].offset.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src.elements.back ().offset.second != std::ssize (data)) {
|
||||||
|
std::cout << "invalid total size\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort (
|
||||||
|
src.elements.begin (),
|
||||||
|
src.elements.end (),
|
||||||
|
region_less
|
||||||
|
);
|
||||||
|
|
||||||
|
std::vector<off64_t> sizes;
|
||||||
|
std::transform (
|
||||||
|
src.elements.begin (),
|
||||||
|
src.elements.end (),
|
||||||
|
std::back_inserter (sizes),
|
||||||
|
[] (auto const &val) { return val.size (); }
|
||||||
|
);
|
||||||
|
auto const myaccum = std::accumulate (std::begin (sizes), std::end (sizes), 0);
|
||||||
|
std::cout << myaccum << '\n';
|
||||||
|
|
||||||
|
auto const total_bytes = std::accumulate (
|
||||||
|
src.elements.begin (),
|
||||||
|
src.elements.end (),
|
||||||
|
0,
|
||||||
|
[] (auto const accum, auto const rhs)
|
||||||
|
{
|
||||||
|
return accum + rhs.size ();
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<emory::chunk::region> unique;
|
||||||
|
std::unique_copy (
|
||||||
|
src.elements.begin (),
|
||||||
|
src.elements.end (),
|
||||||
|
std::back_inserter (unique),
|
||||||
|
region_equal
|
||||||
|
);
|
||||||
|
auto const unique_bytes = std::accumulate (
|
||||||
|
unique.begin (),
|
||||||
|
unique.end (),
|
||||||
|
0, [] (auto const accum, auto const rhs) { return accum + rhs.size (); }
|
||||||
|
);
|
||||||
|
|
||||||
|
auto const duplicated_bytes = total_bytes - unique_bytes;
|
||||||
|
float const duplicated_fraction = float (duplicated_bytes) / total_bytes;
|
||||||
|
|
||||||
|
std::cout << duplicated_bytes << " duplicated bytes of " << total_bytes << " (" << duplicated_fraction << "%)\n";
|
||||||
|
std::cout << (src.elements.size () - unique.size ()) << " duplicates\n";
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user