tools/compare: record the source of the matched blocks
This commit is contained in:
parent
f063bdfd8e
commit
df7b136f24
@ -32,7 +32,7 @@ namespace emory {
|
|||||||
static_hash::digest_t digest;
|
static_hash::digest_t digest;
|
||||||
|
|
||||||
struct digest_equality {
|
struct digest_equality {
|
||||||
bool operator() (chunk const &a, chunk const &b)
|
bool operator() (chunk const &a, chunk const &b) const
|
||||||
{
|
{
|
||||||
return a.digest == b.digest;
|
return a.digest == b.digest;
|
||||||
}
|
}
|
||||||
@ -40,7 +40,7 @@ namespace emory {
|
|||||||
|
|
||||||
|
|
||||||
struct digest_ordering {
|
struct digest_ordering {
|
||||||
bool operator() (chunk const &a, chunk const &b)
|
bool operator() (chunk const &a, chunk const &b) const
|
||||||
{
|
{
|
||||||
return a.digest < b.digest;
|
return a.digest < b.digest;
|
||||||
}
|
}
|
||||||
@ -50,23 +50,35 @@ namespace emory {
|
|||||||
|
|
||||||
template <typename IdT>
|
template <typename IdT>
|
||||||
struct match {
|
struct match {
|
||||||
struct {
|
struct side {
|
||||||
IdT id;
|
IdT id;
|
||||||
chunk data;
|
chunk data;
|
||||||
} src, dst;
|
} src, dst;
|
||||||
|
|
||||||
|
|
||||||
struct src_equality {
|
struct src_equality {
|
||||||
bool operator() (match const &a, match const &b)
|
bool operator() (match const &a, match const &b) const
|
||||||
{
|
{
|
||||||
return a.src == b.src;
|
return a.src == b.src;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct src_ordering {
|
struct dst_equality {
|
||||||
bool operator() (match const &a, match const &b)
|
bool operator() (match const &a, match const &b) const
|
||||||
{
|
{
|
||||||
return a.src < b.src;
|
return chunk::digest_equality {} (
|
||||||
|
a.dst.data,
|
||||||
|
b.dst.data
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct src_ordering {
|
||||||
|
bool operator() (match const &a, match const &b) const
|
||||||
|
{
|
||||||
|
return chunk::digest_ordering {} (a.src.data, b.src.data);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
@ -189,7 +201,7 @@ int main (int argc, char const **argv)
|
|||||||
emory::map target (cruft::mapped_file (argv[ARGS_TARGET]), p);
|
emory::map target (cruft::mapped_file (argv[ARGS_TARGET]), p);
|
||||||
std::sort (target.elements.begin (), target.elements.end (), emory::chunk::digest_ordering {});
|
std::sort (target.elements.begin (), target.elements.end (), emory::chunk::digest_ordering {});
|
||||||
|
|
||||||
std::vector<emory::chunk> found;
|
std::vector<emory::match<int>> found;
|
||||||
|
|
||||||
for (int i = ARGS_SOURCE; i != argc; ++i) {
|
for (int i = ARGS_SOURCE; i != argc; ++i) {
|
||||||
auto const path = argv[i];
|
auto const path = argv[i];
|
||||||
@ -205,21 +217,39 @@ int main (int argc, char const **argv)
|
|||||||
std::begin (source_matches),
|
std::begin (source_matches),
|
||||||
std::end (source_matches),
|
std::end (source_matches),
|
||||||
std::back_inserter (found),
|
std::back_inserter (found),
|
||||||
[] (auto const &i) { return i.src.data; }
|
[&] (auto const &j) -> emory::match<int>
|
||||||
|
{
|
||||||
|
return {
|
||||||
|
.src = { i, j.src.data },
|
||||||
|
.dst = { ARGS_TARGET, j.dst.data },
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
std::sort (
|
||||||
|
std::begin (found),
|
||||||
|
std::end (found),
|
||||||
|
emory::match<int>::src_ordering {}
|
||||||
);
|
);
|
||||||
|
|
||||||
std::sort (std::begin (found), std::end (found), emory::chunk::digest_ordering {});
|
|
||||||
found.erase (
|
found.erase (
|
||||||
std::unique (std::begin (found), std::end (found), emory::chunk::digest_equality {}),
|
std::unique (
|
||||||
|
std::begin (found),
|
||||||
|
std::end (found),
|
||||||
|
emory::match<int>::dst_equality {}
|
||||||
|
),
|
||||||
found.end ()
|
found.end ()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
std::clog << "Finalising\n";
|
std::clog << "Finalising\n";
|
||||||
std::size_t matching = 0;
|
std::size_t matching = 0;
|
||||||
|
std::vector<std::size_t> source_bytes (argc - ARGS_SOURCE, 0);
|
||||||
for (auto const &i: found) {
|
for (auto const &i: found) {
|
||||||
std::cout << i << '\n';
|
std::cout << i.dst.data << '\n';
|
||||||
matching += i.offset.second - i.offset.first;
|
auto const size = i.dst.data.offset.second - i.dst.data.offset.first;
|
||||||
|
matching += size;
|
||||||
|
source_bytes[i.src.id - ARGS_SOURCE] += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t const total = std::accumulate (
|
std::size_t const total = std::accumulate (
|
||||||
@ -230,4 +260,7 @@ int main (int argc, char const **argv)
|
|||||||
);
|
);
|
||||||
|
|
||||||
std::cout << "Found " << found.size () << " chunks. " << matching << "/" << total << " bytes for a factor of " << float (matching) / total << "\n";
|
std::cout << "Found " << found.size () << " chunks. " << matching << "/" << total << " bytes for a factor of " << float (matching) / total << "\n";
|
||||||
|
|
||||||
|
for (int i = ARGS_SOURCE; i != argc; ++i)
|
||||||
|
std::cerr << argv[i] << " contributed: " << source_bytes[i - ARGS_SOURCE] << '\n';
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user