tools/compare: allow multiple sources
This commit is contained in:
parent
fd3f1bfc34
commit
f063bdfd8e
@ -39,7 +39,7 @@ namespace emory {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct digest_comparator {
|
struct digest_ordering {
|
||||||
bool operator() (chunk const &a, chunk const &b)
|
bool operator() (chunk const &a, chunk const &b)
|
||||||
{
|
{
|
||||||
return a.digest < b.digest;
|
return a.digest < b.digest;
|
||||||
@ -48,6 +48,30 @@ namespace emory {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename IdT>
|
||||||
|
struct match {
|
||||||
|
struct {
|
||||||
|
IdT id;
|
||||||
|
chunk data;
|
||||||
|
} src, dst;
|
||||||
|
|
||||||
|
struct src_equality {
|
||||||
|
bool operator() (match const &a, match const &b)
|
||||||
|
{
|
||||||
|
return a.src == b.src;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct src_ordering {
|
||||||
|
bool operator() (match const &a, match const &b)
|
||||||
|
{
|
||||||
|
return a.src < b.src;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
std::ostream& operator<< (std::ostream &os, chunk const &val)
|
std::ostream& operator<< (std::ostream &os, chunk const &val)
|
||||||
{
|
{
|
||||||
return os << "{ first: 0x" << std::setw (8) << std::setfill ('0') << std::hex << val.offset.first
|
return os << "{ first: 0x" << std::setw (8) << std::setfill ('0') << std::hex << val.offset.first
|
||||||
@ -62,7 +86,7 @@ namespace emory {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
std::vector<chunk> common (map const &a, map const &b);
|
std::vector<match<map const*>> common (map const &a, map const &b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -100,31 +124,36 @@ emory::map::map (cruft::view<u08 const *> data, const emory::params &p)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::vector<emory::chunk>
|
std::vector<
|
||||||
emory::common (emory::map const &a, emory::map const &b)
|
emory::match<emory::map const*>
|
||||||
|
>
|
||||||
|
emory::common (emory::map const &src, emory::map const &dst)
|
||||||
{
|
{
|
||||||
CHECK (std::is_sorted (a.elements.begin (), a.elements.end (), emory::chunk::digest_comparator {}));
|
CHECK (std::is_sorted (src.elements.begin (), src.elements.end (), emory::chunk::digest_ordering {}));
|
||||||
CHECK (std::is_sorted (b.elements.begin (), b.elements.end (), emory::chunk::digest_comparator {}));
|
CHECK (std::is_sorted (dst.elements.begin (), dst.elements.end (), emory::chunk::digest_ordering {}));
|
||||||
|
|
||||||
std::vector<emory::chunk> res;
|
std::vector<emory::match<emory::map const*>> res;
|
||||||
|
|
||||||
for (auto a_cursor = a.elements.begin (), b_cursor = b.elements.begin ();
|
for (auto src_cursor = src.elements.begin (), dst_cursor = dst.elements.begin ();
|
||||||
a_cursor != a.elements.end () && b_cursor != b.elements.end ();
|
src_cursor != src.elements.end () && dst_cursor != dst.elements.end ();
|
||||||
/* nothing */)
|
/* nothing */)
|
||||||
{
|
{
|
||||||
if (a_cursor->digest < b_cursor->digest) {
|
if (src_cursor->digest < dst_cursor->digest) {
|
||||||
++a_cursor;
|
++src_cursor;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (b_cursor->digest < a_cursor->digest) {
|
if (dst_cursor->digest < src_cursor->digest) {
|
||||||
++b_cursor;
|
++dst_cursor;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
res.push_back (*a_cursor);
|
res.push_back ({
|
||||||
++a_cursor;
|
.src = { .id = &src, .data = *src_cursor },
|
||||||
++b_cursor;
|
.dst = { .id = &dst, .data = *dst_cursor },
|
||||||
|
});
|
||||||
|
++src_cursor;
|
||||||
|
++dst_cursor;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
@ -145,8 +174,8 @@ enum {
|
|||||||
|
|
||||||
int main (int argc, char const **argv)
|
int main (int argc, char const **argv)
|
||||||
{
|
{
|
||||||
if (argc != NUM_ARGS) {
|
if (argc < NUM_ARGS) {
|
||||||
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <target> <source>\n";
|
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <target> <source> [...]\n";
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,16 +185,37 @@ int main (int argc, char const **argv)
|
|||||||
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
|
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
|
||||||
};
|
};
|
||||||
|
|
||||||
emory::map target (cruft::mapped_file (argv[ARGS_TARGET]), p);
|
|
||||||
emory::map source (cruft::mapped_file (argv[ARGS_SOURCE]), p);
|
|
||||||
|
|
||||||
std::clog << "Hashing target\n";
|
std::clog << "Hashing target\n";
|
||||||
std::sort (target.elements.begin (), target.elements.end (), emory::chunk::digest_comparator {});
|
emory::map target (cruft::mapped_file (argv[ARGS_TARGET]), p);
|
||||||
std::clog << "Hashing source\n";
|
std::sort (target.elements.begin (), target.elements.end (), emory::chunk::digest_ordering {});
|
||||||
std::sort (source.elements.begin (), source.elements.end (), emory::chunk::digest_comparator {});
|
|
||||||
|
std::vector<emory::chunk> found;
|
||||||
|
|
||||||
|
for (int i = ARGS_SOURCE; i != argc; ++i) {
|
||||||
|
auto const path = argv[i];
|
||||||
|
std::clog << "Hashing source: " << path << '\n';
|
||||||
|
emory::map source (cruft::mapped_file (path), p);
|
||||||
|
std::sort (source.elements.begin (), source.elements.end (), emory::chunk::digest_ordering {});
|
||||||
|
|
||||||
std::clog << "Finding common\n";
|
std::clog << "Finding common\n";
|
||||||
auto const &found = common (target, source);
|
auto const &source_matches = common (target, source);
|
||||||
|
std::clog << "Discovered " << source_matches.size () << " blocks\n";
|
||||||
|
|
||||||
|
std::transform (
|
||||||
|
std::begin (source_matches),
|
||||||
|
std::end (source_matches),
|
||||||
|
std::back_inserter (found),
|
||||||
|
[] (auto const &i) { return i.src.data; }
|
||||||
|
);
|
||||||
|
|
||||||
|
std::sort (std::begin (found), std::end (found), emory::chunk::digest_ordering {});
|
||||||
|
found.erase (
|
||||||
|
std::unique (std::begin (found), std::end (found), emory::chunk::digest_equality {}),
|
||||||
|
found.end ()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::clog << "Finalising\n";
|
||||||
std::size_t matching = 0;
|
std::size_t matching = 0;
|
||||||
for (auto const &i: found) {
|
for (auto const &i: found) {
|
||||||
std::cout << i << '\n';
|
std::cout << i << '\n';
|
||||||
|
Loading…
Reference in New Issue
Block a user