emory: break comparison tool into distinct units
This commit is contained in:
parent
ff348b4484
commit
54c466f5e5
@ -14,6 +14,18 @@ include_directories(.)
|
|||||||
add_library(emory
|
add_library(emory
|
||||||
acl.cpp
|
acl.cpp
|
||||||
acl.hpp
|
acl.hpp
|
||||||
|
emory/chunk/fwd.cpp
|
||||||
|
emory/chunk/fwd.hpp
|
||||||
|
emory/chunk/map.cpp
|
||||||
|
emory/chunk/map.hpp
|
||||||
|
emory/chunk/match.cpp
|
||||||
|
emory/chunk/match.hpp
|
||||||
|
emory/chunk/ostream.cpp
|
||||||
|
emory/chunk/ostream.hpp
|
||||||
|
emory/chunk/params.cpp
|
||||||
|
emory/chunk/params.hpp
|
||||||
|
emory/chunk/region.cpp
|
||||||
|
emory/chunk/region.hpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
9
emory/chunk/fwd.cpp
Normal file
9
emory/chunk/fwd.cpp
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "fwd.hpp"
|
17
emory/chunk/fwd.hpp
Normal file
17
emory/chunk/fwd.hpp
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace emory::chunk {
|
||||||
|
struct params;
|
||||||
|
struct region;
|
||||||
|
struct map;
|
||||||
|
|
||||||
|
template <typename IdT> struct match;
|
||||||
|
};
|
50
emory/chunk/map.cpp
Normal file
50
emory/chunk/map.cpp
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "map.hpp"
|
||||||
|
|
||||||
|
#include "params.hpp"
|
||||||
|
|
||||||
|
#include <cruft/util/hash/buzhash.hpp>
|
||||||
|
|
||||||
|
using emory::chunk::map;
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
map::map (cruft::view<u08 const *> data, const emory::chunk::params &p)
|
||||||
|
{
|
||||||
|
using hash_type = cruft::hash::buzhash<u64>;
|
||||||
|
hash_type h (p.window, data);
|
||||||
|
auto remain = data.consume (p.window);
|
||||||
|
|
||||||
|
using digest_type = hash_type::digest_type ;
|
||||||
|
digest_type const mask = ~digest_type (0) >> (sizeof (digest_type) * 8 - p.bits);
|
||||||
|
|
||||||
|
for (u08 const *cursor = remain.begin (), *start = data.begin (); cursor != remain.end (); cursor++) {
|
||||||
|
auto const digest = h (cursor);
|
||||||
|
|
||||||
|
if (std::distance (start, cursor) < p.minimum)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (unlikely (digest & mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
cruft::view<u08 const*> const region { start, cursor };
|
||||||
|
start = cursor + 1;
|
||||||
|
|
||||||
|
elements.push_back ({
|
||||||
|
.offset = {
|
||||||
|
std::pair<std::size_t,std::size_t> {
|
||||||
|
std::distance (data.begin (), region.begin ()),
|
||||||
|
std::distance (data.begin (), region.end ())
|
||||||
|
},
|
||||||
|
},
|
||||||
|
.digest = static_hash {} (region),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
25
emory/chunk/map.hpp
Normal file
25
emory/chunk/map.hpp
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fwd.hpp"
|
||||||
|
|
||||||
|
#include "region.hpp"
|
||||||
|
|
||||||
|
#include <cruft/util/view.hpp>
|
||||||
|
#include <cruft/util/std.hpp>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace emory::chunk {
|
||||||
|
struct map {
|
||||||
|
map (cruft::view<u08 const*> data, params const&);
|
||||||
|
std::vector<region> elements;
|
||||||
|
};
|
||||||
|
}
|
46
emory/chunk/match.cpp
Normal file
46
emory/chunk/match.cpp
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "match.hpp"
|
||||||
|
|
||||||
|
#include "map.hpp"
|
||||||
|
|
||||||
|
std::vector<
|
||||||
|
emory::chunk::match<emory::chunk::map const*>
|
||||||
|
>
|
||||||
|
emory::chunk::common (map const &src, map const &dst)
|
||||||
|
{
|
||||||
|
CHECK (std::is_sorted (src.elements.begin (), src.elements.end (), region::digest_ordering {}));
|
||||||
|
CHECK (std::is_sorted (dst.elements.begin (), dst.elements.end (), region::digest_ordering {}));
|
||||||
|
|
||||||
|
std::vector<match<map const*>> res;
|
||||||
|
|
||||||
|
for (auto src_cursor = src.elements.begin (), dst_cursor = dst.elements.begin ();
|
||||||
|
src_cursor != src.elements.end () && dst_cursor != dst.elements.end ();
|
||||||
|
/* nothing */)
|
||||||
|
{
|
||||||
|
if (src_cursor->digest < dst_cursor->digest) {
|
||||||
|
++src_cursor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst_cursor->digest < src_cursor->digest) {
|
||||||
|
++dst_cursor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
res.push_back ({
|
||||||
|
.src = { .id = &src, .data = *src_cursor },
|
||||||
|
.dst = { .id = &dst, .data = *dst_cursor },
|
||||||
|
});
|
||||||
|
++src_cursor;
|
||||||
|
++dst_cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
56
emory/chunk/match.hpp
Normal file
56
emory/chunk/match.hpp
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fwd.hpp"
|
||||||
|
#include "region.hpp"
|
||||||
|
|
||||||
|
|
||||||
|
namespace emory::chunk {
|
||||||
|
template <typename IdT>
|
||||||
|
struct match {
|
||||||
|
struct side {
|
||||||
|
IdT id;
|
||||||
|
region data;
|
||||||
|
};
|
||||||
|
|
||||||
|
side src, dst;
|
||||||
|
|
||||||
|
|
||||||
|
struct src_equality {
|
||||||
|
bool operator() (match const &a, match const &b) const
|
||||||
|
{
|
||||||
|
return a.src == b.src;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct dst_equality {
|
||||||
|
bool operator() (match const &a, match const &b) const
|
||||||
|
{
|
||||||
|
return region::digest_equality {} (
|
||||||
|
a.dst.data,
|
||||||
|
b.dst.data
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct src_ordering {
|
||||||
|
bool operator() (match const &a, match const &b) const
|
||||||
|
{
|
||||||
|
return region::digest_ordering {} (a.src.data, b.src.data);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
std::vector<match<map const*>>
|
||||||
|
common (map const &a, map const &b);
|
||||||
|
}
|
39
emory/chunk/ostream.cpp
Normal file
39
emory/chunk/ostream.cpp
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "ostream.hpp"
|
||||||
|
|
||||||
|
#include "region.hpp"
|
||||||
|
#include "match.hpp"
|
||||||
|
|
||||||
|
#include <ostream>
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
std::ostream&
|
||||||
|
emory::chunk::operator<< (std::ostream &os, region const &val)
|
||||||
|
{
|
||||||
|
return os << "{ first: 0x" << std::setw (8) << std::setfill ('0') << std::hex << val.offset.first
|
||||||
|
<< ", second: 0x" << std::setw (8) << std::setfill ('0') << std::hex << val.offset.second
|
||||||
|
<< " }" << std::dec;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
template <typename IdT>
|
||||||
|
std::ostream&
|
||||||
|
emory::chunk::operator<< (std::ostream &os, match<IdT> const &val)
|
||||||
|
{
|
||||||
|
return os << "{ src: { id: " << val.src.id << ", data: " << val.src.data << " }"
|
||||||
|
<< ", dst: { id: " << val.dst.id << ", data: " << val.dst.data << " }"
|
||||||
|
<< " }";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//-----------------------------------------------------------------------------
|
||||||
|
template std::ostream& emory::chunk::operator<< (std::ostream&, match<int> const&);
|
23
emory/chunk/ostream.hpp
Normal file
23
emory/chunk/ostream.hpp
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fwd.hpp"
|
||||||
|
|
||||||
|
#include <iosfwd>
|
||||||
|
|
||||||
|
|
||||||
|
namespace emory::chunk {
|
||||||
|
std::ostream& operator<< (std::ostream &, region const &);
|
||||||
|
|
||||||
|
|
||||||
|
template <typename IdT>
|
||||||
|
std::ostream&
|
||||||
|
operator<< (std::ostream &, match<IdT> const &);
|
||||||
|
}
|
9
emory/chunk/params.cpp
Normal file
9
emory/chunk/params.cpp
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "params.hpp"
|
19
emory/chunk/params.hpp
Normal file
19
emory/chunk/params.hpp
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
|
||||||
|
namespace emory::chunk {
|
||||||
|
struct params {
|
||||||
|
std::size_t bits;
|
||||||
|
std::size_t window;
|
||||||
|
std::ptrdiff_t minimum;
|
||||||
|
};
|
||||||
|
}
|
9
emory/chunk/region.cpp
Normal file
9
emory/chunk/region.cpp
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "region.hpp"
|
38
emory/chunk/region.hpp
Normal file
38
emory/chunk/region.hpp
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
/*
|
||||||
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
*
|
||||||
|
* Copyright 2019, Danny Robson <danny@nerdcruft.net>
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cruft/crypto/hash/sha1.hpp>
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace emory::chunk {
|
||||||
|
using static_hash = cruft::crypto::hash::SHA1;
|
||||||
|
|
||||||
|
|
||||||
|
struct region {
|
||||||
|
std::pair<off64_t, off64_t> offset;
|
||||||
|
static_hash::digest_t digest;
|
||||||
|
|
||||||
|
struct digest_equality {
|
||||||
|
bool operator() (region const &a, region const &b) const
|
||||||
|
{
|
||||||
|
return a.digest == b.digest;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct digest_ordering {
|
||||||
|
bool operator() (region const &a, region const &b) const
|
||||||
|
{
|
||||||
|
return a.digest < b.digest;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
@ -6,182 +6,16 @@
|
|||||||
* Copyright 2013 Danny Robson <danny@nerdcruft.net>
|
* Copyright 2013 Danny Robson <danny@nerdcruft.net>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <cruft/util/debug.hpp>
|
#include "emory/chunk/params.hpp"
|
||||||
#include <cruft/util/hash/buzhash.hpp>
|
#include "emory/chunk/map.hpp"
|
||||||
|
#include "emory/chunk/match.hpp"
|
||||||
|
#include "emory/chunk/ostream.hpp"
|
||||||
|
|
||||||
#include <cruft/util/io.hpp>
|
#include <cruft/util/io.hpp>
|
||||||
#include <cruft/util/parse/value.hpp>
|
#include <cruft/util/parse/value.hpp>
|
||||||
|
|
||||||
#include <cruft/crypto/hash/sha1.hpp>
|
#include <cstddef>
|
||||||
|
|
||||||
#include <algorithm>
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
|
||||||
|
|
||||||
|
|
||||||
namespace emory {
|
|
||||||
using static_hash = cruft::crypto::hash::SHA1;
|
|
||||||
|
|
||||||
struct params {
|
|
||||||
std::size_t bits;
|
|
||||||
std::size_t window;
|
|
||||||
std::ptrdiff_t minimum;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct chunk {
|
|
||||||
std::pair<off64_t, off64_t> offset;
|
|
||||||
static_hash::digest_t digest;
|
|
||||||
|
|
||||||
struct digest_equality {
|
|
||||||
bool operator() (chunk const &a, chunk const &b) const
|
|
||||||
{
|
|
||||||
return a.digest == b.digest;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct digest_ordering {
|
|
||||||
bool operator() (chunk const &a, chunk const &b) const
|
|
||||||
{
|
|
||||||
return a.digest < b.digest;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <typename IdT>
|
|
||||||
struct match {
|
|
||||||
struct side {
|
|
||||||
IdT id;
|
|
||||||
chunk data;
|
|
||||||
};
|
|
||||||
|
|
||||||
side src, dst;
|
|
||||||
|
|
||||||
|
|
||||||
struct src_equality {
|
|
||||||
bool operator() (match const &a, match const &b) const
|
|
||||||
{
|
|
||||||
return a.src == b.src;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct dst_equality {
|
|
||||||
bool operator() (match const &a, match const &b) const
|
|
||||||
{
|
|
||||||
return chunk::digest_equality {} (
|
|
||||||
a.dst.data,
|
|
||||||
b.dst.data
|
|
||||||
);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
struct src_ordering {
|
|
||||||
bool operator() (match const &a, match const &b) const
|
|
||||||
{
|
|
||||||
return chunk::digest_ordering {} (a.src.data, b.src.data);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
std::ostream& operator<< (std::ostream &os, chunk const &val)
|
|
||||||
{
|
|
||||||
return os << "{ first: 0x" << std::setw (8) << std::setfill ('0') << std::hex << val.offset.first
|
|
||||||
<< ", second: 0x" << std::setw (8) << std::setfill ('0') << std::hex << val.offset.second
|
|
||||||
<< " }" << std::dec;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
struct map {
|
|
||||||
map (cruft::view<u08 const*> data, params const&);
|
|
||||||
std::vector<chunk> elements;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
template <typename IdT>
|
|
||||||
std::ostream&
|
|
||||||
operator<< (std::ostream &os, match<IdT> const &val)
|
|
||||||
{
|
|
||||||
return os << "{ src: { id: " << val.src.id << ", data: " << val.src.data << " }"
|
|
||||||
<< ", dst: { id: " << val.dst.id << ", data: " << val.dst.data << " }"
|
|
||||||
<< " }";
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::vector<match<map const*>> common (map const &a, map const &b);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
emory::map::map (cruft::view<u08 const *> data, const emory::params &p)
|
|
||||||
{
|
|
||||||
using hash_type = cruft::hash::buzhash<u64>;
|
|
||||||
hash_type h (p.window, data);
|
|
||||||
auto remain = data.consume (p.window);
|
|
||||||
|
|
||||||
using digest_type = hash_type::digest_type ;
|
|
||||||
digest_type const mask = ~digest_type (0) >> (sizeof (digest_type) * 8 - p.bits);
|
|
||||||
|
|
||||||
for (u08 const *cursor = remain.begin (), *start = data.begin (); cursor != remain.end (); cursor++) {
|
|
||||||
auto const digest = h (cursor);
|
|
||||||
|
|
||||||
if (std::distance (start, cursor) < p.minimum)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (unlikely (digest & mask))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
cruft::view<u08 const*> const region { start, cursor };
|
|
||||||
start = cursor + 1;
|
|
||||||
|
|
||||||
elements.push_back ({
|
|
||||||
.offset = {
|
|
||||||
std::pair<std::size_t,std::size_t> {
|
|
||||||
std::distance (data.begin (), region.begin ()),
|
|
||||||
std::distance (data.begin (), region.end ())
|
|
||||||
},
|
|
||||||
},
|
|
||||||
.digest = static_hash {} (region),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
std::vector<
|
|
||||||
emory::match<emory::map const*>
|
|
||||||
>
|
|
||||||
emory::common (emory::map const &src, emory::map const &dst)
|
|
||||||
{
|
|
||||||
CHECK (std::is_sorted (src.elements.begin (), src.elements.end (), emory::chunk::digest_ordering {}));
|
|
||||||
CHECK (std::is_sorted (dst.elements.begin (), dst.elements.end (), emory::chunk::digest_ordering {}));
|
|
||||||
|
|
||||||
std::vector<emory::match<emory::map const*>> res;
|
|
||||||
|
|
||||||
for (auto src_cursor = src.elements.begin (), dst_cursor = dst.elements.begin ();
|
|
||||||
src_cursor != src.elements.end () && dst_cursor != dst.elements.end ();
|
|
||||||
/* nothing */)
|
|
||||||
{
|
|
||||||
if (src_cursor->digest < dst_cursor->digest) {
|
|
||||||
++src_cursor;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dst_cursor->digest < src_cursor->digest) {
|
|
||||||
++dst_cursor;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
res.push_back ({
|
|
||||||
.src = { .id = &src, .data = *src_cursor },
|
|
||||||
.dst = { .id = &dst, .data = *dst_cursor },
|
|
||||||
});
|
|
||||||
++src_cursor;
|
|
||||||
++dst_cursor;
|
|
||||||
}
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -203,24 +37,24 @@ int main (int argc, char const **argv)
|
|||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
emory::params const p {
|
emory::chunk::params const p {
|
||||||
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
|
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
|
||||||
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
|
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
|
||||||
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
|
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
|
||||||
};
|
};
|
||||||
|
|
||||||
std::clog << "Hashing target\n";
|
std::clog << "Hashing target\n";
|
||||||
emory::map target (cruft::mapped_file (argv[ARGS_TARGET]), p);
|
emory::chunk::map target (cruft::mapped_file (argv[ARGS_TARGET]), p);
|
||||||
std::sort (target.elements.begin (), target.elements.end (), emory::chunk::digest_ordering {});
|
std::sort (target.elements.begin (), target.elements.end (), emory::chunk::region::digest_ordering {});
|
||||||
std::cout << "Found " << target.elements.size () << " chunks\n";
|
std::cout << "Found " << target.elements.size () << " chunks\n";
|
||||||
|
|
||||||
std::vector<emory::match<int>> found;
|
std::vector<emory::chunk::match<int>> found;
|
||||||
|
|
||||||
for (int i = ARGS_SOURCE; i != argc; ++i) {
|
for (int i = ARGS_SOURCE; i != argc; ++i) {
|
||||||
auto const path = argv[i];
|
auto const path = argv[i];
|
||||||
std::clog << "Hashing source: " << path << '\n';
|
std::clog << "Hashing source: " << path << '\n';
|
||||||
emory::map source (cruft::mapped_file (path), p);
|
emory::chunk::map source (cruft::mapped_file (path), p);
|
||||||
std::sort (source.elements.begin (), source.elements.end (), emory::chunk::digest_ordering {});
|
std::sort (source.elements.begin (), source.elements.end (), emory::chunk::region::digest_ordering {});
|
||||||
|
|
||||||
std::clog << "Finding common\n";
|
std::clog << "Finding common\n";
|
||||||
auto const &source_matches = common (target, source);
|
auto const &source_matches = common (target, source);
|
||||||
@ -230,7 +64,7 @@ int main (int argc, char const **argv)
|
|||||||
std::begin (source_matches),
|
std::begin (source_matches),
|
||||||
std::end (source_matches),
|
std::end (source_matches),
|
||||||
std::back_inserter (found),
|
std::back_inserter (found),
|
||||||
[&] (auto const &j) -> emory::match<int>
|
[&] (auto const &j) -> emory::chunk::match<int>
|
||||||
{
|
{
|
||||||
return {
|
return {
|
||||||
.src = { i, j.src.data },
|
.src = { i, j.src.data },
|
||||||
@ -241,14 +75,14 @@ int main (int argc, char const **argv)
|
|||||||
std::sort (
|
std::sort (
|
||||||
std::begin (found),
|
std::begin (found),
|
||||||
std::end (found),
|
std::end (found),
|
||||||
emory::match<int>::src_ordering {}
|
emory::chunk::match<int>::src_ordering {}
|
||||||
);
|
);
|
||||||
|
|
||||||
found.erase (
|
found.erase (
|
||||||
std::unique (
|
std::unique (
|
||||||
std::begin (found),
|
std::begin (found),
|
||||||
std::end (found),
|
std::end (found),
|
||||||
emory::match<int>::dst_equality {}
|
emory::chunk::match<int>::dst_equality {}
|
||||||
),
|
),
|
||||||
found.end ()
|
found.end ()
|
||||||
);
|
);
|
||||||
|
Loading…
Reference in New Issue
Block a user