chunk/param: add a maximum chunk size param
This commit is contained in:
parent
3ad55453f7
commit
c172ee2c40
@ -9,6 +9,7 @@
|
||||
#include "params.hpp"
|
||||
|
||||
#include <cruft/util/hash/buzhash.hpp>
|
||||
#include <cruft/util/debug/assert.hpp>
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@ -37,14 +38,18 @@ namespace emory::chunk {
|
||||
hash_state = cruft::rotatel (hash_state, 1) ^ *cursor++;
|
||||
|
||||
for ( ; cursor < src.end () - p.window; ++cursor) {
|
||||
if (likely (hash_state & mask)) {
|
||||
hash_state = cruft::rotatel (hash_state, 1)
|
||||
^ cruft::rotatel (u64 (*(cursor - p.window)), p.window)
|
||||
^ *cursor;
|
||||
continue;
|
||||
if (cursor < start + p.maximum) {
|
||||
if (likely (hash_state & mask)) {
|
||||
hash_state = cruft::rotatel (hash_state, 1)
|
||||
^ cruft::rotatel (u64 (*(cursor - p.window)), p.window)
|
||||
^ *cursor;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
cruft::view<u08 const*> const region { start, cursor };
|
||||
CHECK_GE (cursor - start, p.minimum);
|
||||
CHECK_LE (cursor - start, p.maximum);
|
||||
|
||||
*dst = {
|
||||
.offset = {
|
||||
|
@ -23,6 +23,7 @@ emory::chunk::operator<< (std::ostream &os, params const &val)
|
||||
return os << "{ bits: " << val.bits
|
||||
<< ", window: " << val.window
|
||||
<< ", minimum: " << val.minimum
|
||||
<< ", maximum: " << val.maximum
|
||||
<< " }";
|
||||
}
|
||||
|
||||
|
@ -18,11 +18,14 @@ namespace emory::chunk {
|
||||
std::size_t window;
|
||||
/// The minimum number of bytes for a matching region.
|
||||
std::ptrdiff_t minimum;
|
||||
/// The maximum number of bytes for a matching region
|
||||
std::ptrdiff_t maximum;
|
||||
};
|
||||
|
||||
constexpr params DEFAULT_PARAMS {
|
||||
.bits = 12,
|
||||
.window = 8,
|
||||
.minimum = 4096,
|
||||
.maximum = 4 * 1024 * 1024,
|
||||
};
|
||||
}
|
||||
|
@ -154,6 +154,7 @@ enum {
|
||||
ARG_BITS,
|
||||
ARG_WINDOW,
|
||||
ARG_MINIMUM,
|
||||
ARG_MAXIMUM,
|
||||
|
||||
NUM_ARGS,
|
||||
NUM_ARGS_REQUIRED = 3,
|
||||
@ -164,10 +165,11 @@ enum {
|
||||
int main (int argc, char const **argv)
|
||||
{
|
||||
if (argc < NUM_ARGS_REQUIRED) {
|
||||
std::cerr << "usage: " << argv[ARG_SELF] << " <input> <output> [bits] [window] [minimum]\n"
|
||||
std::cerr << "usage: " << argv[ARG_SELF] << " <input> <output> [bits] [window] [minimum] [maximum]\n"
|
||||
<< "default bits = " << emory::chunk::DEFAULT_PARAMS.bits << '\n'
|
||||
<< "default window = " << emory::chunk::DEFAULT_PARAMS.window << '\n'
|
||||
<< "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n';
|
||||
<< "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n'
|
||||
<< "default maximum = " << emory::chunk::DEFAULT_PARAMS.maximum << '\n';
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
@ -178,6 +180,8 @@ int main (int argc, char const **argv)
|
||||
p.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]);
|
||||
if (argc > ARG_BITS + 1)
|
||||
p.minimum = cruft::parse::from_string<std::size_t> (argv[ARG_MINIMUM]);
|
||||
if (argc > ARG_MAXIMUM + 1)
|
||||
p.maximum = cruft::parse::from_string<std::size_t> (argv[ARG_MAXIMUM]);
|
||||
|
||||
std::cerr << p << '\n';
|
||||
|
||||
|
@ -24,6 +24,7 @@ enum {
|
||||
ARG_BITS,
|
||||
ARG_WINDOW,
|
||||
ARGS_MINIMUM,
|
||||
ARGS_MAXIMUM,
|
||||
ARGS_TARGET,
|
||||
ARGS_SOURCE,
|
||||
|
||||
@ -34,14 +35,15 @@ enum {
|
||||
int main (int argc, char const **argv)
|
||||
{
|
||||
if (argc < NUM_ARGS) {
|
||||
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <target> <source> [...]\n";
|
||||
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <maximum> <target> <source> [...]\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
emory::chunk::params const p {
|
||||
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
|
||||
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
|
||||
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
|
||||
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
|
||||
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
|
||||
.maximum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MAXIMUM]),
|
||||
};
|
||||
|
||||
std::clog << "Hashing target\n";
|
||||
|
Loading…
Reference in New Issue
Block a user