/* * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * Copyright 2013 Danny Robson */ #include "emory/chunk/params.hpp" #include "emory/chunk/find.hpp" #include "emory/chunk/region.hpp" #include "emory/chunk/match.hpp" #include "emory/chunk/ostream.hpp" #include #include #include #include enum { ARG_SELF, ARG_BITS, ARG_WINDOW, ARGS_MINIMUM, ARGS_MAXIMUM, ARGS_TARGET, ARGS_SOURCE, NUM_ARGS, }; int main (int argc, char const **argv) { if (argc < NUM_ARGS) { std::cerr << "usage: " << argv[ARG_SELF] << " [...]\n"; return EXIT_FAILURE; } emory::chunk::params const p { .bits = cruft::parse::from_string (argv[ARG_BITS ]), .window = cruft::parse::from_string (argv[ARG_WINDOW]), .minimum = cruft::parse::from_string (argv[ARGS_MINIMUM]), .maximum = cruft::parse::from_string (argv[ARGS_MAXIMUM]), }; std::clog << "Hashing target\n"; std::vector target; emory::chunk::find ( std::back_inserter (target), cruft::mapped_file (argv[ARGS_TARGET]), p ); std::sort (target.begin (), target.end (), emory::chunk::region::digest_ordering {}); std::cout << "Found " << target.size () << " chunks\n"; std::vector> found; for (int i = ARGS_SOURCE; i != argc; ++i) { auto const path = argv[i]; std::clog << "Hashing source: " << path << '\n'; std::vector source; emory::chunk::find ( std::back_inserter (source), cruft::mapped_file (path), p ); std::sort (source.begin (), source.end (), emory::chunk::region::digest_ordering {}); std::clog << "Finding common\n"; auto const &source_matches = common (target, source); std::clog << "Discovered " << source_matches.size () << " blocks\n"; std::transform ( std::begin (source_matches), std::end (source_matches), std::back_inserter (found), [&] (auto const &j) -> emory::chunk::match { return { .src = { i, j.src.data }, .dst = { ARGS_TARGET, j.dst.data }, }; }); std::sort ( std::begin (found), std::end (found), emory::chunk::match::src_ordering {} ); found.erase ( std::unique ( std::begin (found), std::end (found), emory::chunk::match::dst_equality {} ), found.end () ); } std::clog << "Finalising\n"; std::size_t matching = 0; std::vector source_bytes (argc - ARGS_SOURCE, 0); for (auto const &i: found) { std::cout << i << '\n'; auto const size = i.dst.data.offset.second - i.dst.data.offset.first; matching += size; source_bytes[i.src.id - ARGS_SOURCE] += size; } std::size_t const total = std::accumulate ( target.begin (), target.end (), 0u, [] (auto const &a, auto const &b) { return a + b.offset.second - b.offset.first; } ); std::cout << "Found " << found.size () << " chunks. " << matching << "/" << total << " bytes for a factor of " << float (matching) / total << "\n"; for (int i = ARGS_SOURCE; i != argc; ++i) std::cerr << argv[i] << " contributed: " << source_bytes[i - ARGS_SOURCE] << '\n'; }