chunk/param: add a maximum chunk size param

This commit is contained in:
Danny Robson 2020-12-29 08:01:40 +10:00
parent 3ad55453f7
commit c172ee2c40
5 changed files with 25 additions and 10 deletions

View File

@ -9,6 +9,7 @@
#include "params.hpp"
#include <cruft/util/hash/buzhash.hpp>
#include <cruft/util/debug/assert.hpp>
///////////////////////////////////////////////////////////////////////////////
@ -37,14 +38,18 @@ namespace emory::chunk {
hash_state = cruft::rotatel (hash_state, 1) ^ *cursor++;
for ( ; cursor < src.end () - p.window; ++cursor) {
if (likely (hash_state & mask)) {
hash_state = cruft::rotatel (hash_state, 1)
^ cruft::rotatel (u64 (*(cursor - p.window)), p.window)
^ *cursor;
continue;
if (cursor < start + p.maximum) {
if (likely (hash_state & mask)) {
hash_state = cruft::rotatel (hash_state, 1)
^ cruft::rotatel (u64 (*(cursor - p.window)), p.window)
^ *cursor;
continue;
}
}
cruft::view<u08 const*> const region { start, cursor };
CHECK_GE (cursor - start, p.minimum);
CHECK_LE (cursor - start, p.maximum);
*dst = {
.offset = {

View File

@ -23,6 +23,7 @@ emory::chunk::operator<< (std::ostream &os, params const &val)
return os << "{ bits: " << val.bits
<< ", window: " << val.window
<< ", minimum: " << val.minimum
<< ", maximum: " << val.maximum
<< " }";
}

View File

@ -18,11 +18,14 @@ namespace emory::chunk {
std::size_t window;
/// The minimum number of bytes for a matching region.
std::ptrdiff_t minimum;
/// The maximum number of bytes for a matching region
std::ptrdiff_t maximum;
};
constexpr params DEFAULT_PARAMS {
.bits = 12,
.window = 8,
.minimum = 4096,
.maximum = 4 * 1024 * 1024,
};
}

View File

@ -154,6 +154,7 @@ enum {
ARG_BITS,
ARG_WINDOW,
ARG_MINIMUM,
ARG_MAXIMUM,
NUM_ARGS,
NUM_ARGS_REQUIRED = 3,
@ -164,10 +165,11 @@ enum {
int main (int argc, char const **argv)
{
if (argc < NUM_ARGS_REQUIRED) {
std::cerr << "usage: " << argv[ARG_SELF] << " <input> <output> [bits] [window] [minimum]\n"
std::cerr << "usage: " << argv[ARG_SELF] << " <input> <output> [bits] [window] [minimum] [maximum]\n"
<< "default bits = " << emory::chunk::DEFAULT_PARAMS.bits << '\n'
<< "default window = " << emory::chunk::DEFAULT_PARAMS.window << '\n'
<< "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n';
<< "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n'
<< "default maximum = " << emory::chunk::DEFAULT_PARAMS.maximum << '\n';
return EXIT_FAILURE;
}
@ -178,6 +180,8 @@ int main (int argc, char const **argv)
p.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]);
if (argc > ARG_BITS + 1)
p.minimum = cruft::parse::from_string<std::size_t> (argv[ARG_MINIMUM]);
if (argc > ARG_MAXIMUM + 1)
p.maximum = cruft::parse::from_string<std::size_t> (argv[ARG_MAXIMUM]);
std::cerr << p << '\n';

View File

@ -24,6 +24,7 @@ enum {
ARG_BITS,
ARG_WINDOW,
ARGS_MINIMUM,
ARGS_MAXIMUM,
ARGS_TARGET,
ARGS_SOURCE,
@ -34,14 +35,15 @@ enum {
int main (int argc, char const **argv)
{
if (argc < NUM_ARGS) {
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <target> <source> [...]\n";
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <maximum> <target> <source> [...]\n";
return EXIT_FAILURE;
}
emory::chunk::params const p {
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
.maximum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MAXIMUM]),
};
std::clog << "Hashing target\n";