chunk/param: add a maximum chunk size param
This commit is contained in:
parent
3ad55453f7
commit
c172ee2c40
@ -9,6 +9,7 @@
|
|||||||
#include "params.hpp"
|
#include "params.hpp"
|
||||||
|
|
||||||
#include <cruft/util/hash/buzhash.hpp>
|
#include <cruft/util/hash/buzhash.hpp>
|
||||||
|
#include <cruft/util/debug/assert.hpp>
|
||||||
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -37,14 +38,18 @@ namespace emory::chunk {
|
|||||||
hash_state = cruft::rotatel (hash_state, 1) ^ *cursor++;
|
hash_state = cruft::rotatel (hash_state, 1) ^ *cursor++;
|
||||||
|
|
||||||
for ( ; cursor < src.end () - p.window; ++cursor) {
|
for ( ; cursor < src.end () - p.window; ++cursor) {
|
||||||
if (likely (hash_state & mask)) {
|
if (cursor < start + p.maximum) {
|
||||||
hash_state = cruft::rotatel (hash_state, 1)
|
if (likely (hash_state & mask)) {
|
||||||
^ cruft::rotatel (u64 (*(cursor - p.window)), p.window)
|
hash_state = cruft::rotatel (hash_state, 1)
|
||||||
^ *cursor;
|
^ cruft::rotatel (u64 (*(cursor - p.window)), p.window)
|
||||||
continue;
|
^ *cursor;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cruft::view<u08 const*> const region { start, cursor };
|
cruft::view<u08 const*> const region { start, cursor };
|
||||||
|
CHECK_GE (cursor - start, p.minimum);
|
||||||
|
CHECK_LE (cursor - start, p.maximum);
|
||||||
|
|
||||||
*dst = {
|
*dst = {
|
||||||
.offset = {
|
.offset = {
|
||||||
|
@ -23,6 +23,7 @@ emory::chunk::operator<< (std::ostream &os, params const &val)
|
|||||||
return os << "{ bits: " << val.bits
|
return os << "{ bits: " << val.bits
|
||||||
<< ", window: " << val.window
|
<< ", window: " << val.window
|
||||||
<< ", minimum: " << val.minimum
|
<< ", minimum: " << val.minimum
|
||||||
|
<< ", maximum: " << val.maximum
|
||||||
<< " }";
|
<< " }";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,11 +18,14 @@ namespace emory::chunk {
|
|||||||
std::size_t window;
|
std::size_t window;
|
||||||
/// The minimum number of bytes for a matching region.
|
/// The minimum number of bytes for a matching region.
|
||||||
std::ptrdiff_t minimum;
|
std::ptrdiff_t minimum;
|
||||||
|
/// The maximum number of bytes for a matching region
|
||||||
|
std::ptrdiff_t maximum;
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr params DEFAULT_PARAMS {
|
constexpr params DEFAULT_PARAMS {
|
||||||
.bits = 12,
|
.bits = 12,
|
||||||
.window = 8,
|
.window = 8,
|
||||||
.minimum = 4096,
|
.minimum = 4096,
|
||||||
|
.maximum = 4 * 1024 * 1024,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -154,6 +154,7 @@ enum {
|
|||||||
ARG_BITS,
|
ARG_BITS,
|
||||||
ARG_WINDOW,
|
ARG_WINDOW,
|
||||||
ARG_MINIMUM,
|
ARG_MINIMUM,
|
||||||
|
ARG_MAXIMUM,
|
||||||
|
|
||||||
NUM_ARGS,
|
NUM_ARGS,
|
||||||
NUM_ARGS_REQUIRED = 3,
|
NUM_ARGS_REQUIRED = 3,
|
||||||
@ -164,10 +165,11 @@ enum {
|
|||||||
int main (int argc, char const **argv)
|
int main (int argc, char const **argv)
|
||||||
{
|
{
|
||||||
if (argc < NUM_ARGS_REQUIRED) {
|
if (argc < NUM_ARGS_REQUIRED) {
|
||||||
std::cerr << "usage: " << argv[ARG_SELF] << " <input> <output> [bits] [window] [minimum]\n"
|
std::cerr << "usage: " << argv[ARG_SELF] << " <input> <output> [bits] [window] [minimum] [maximum]\n"
|
||||||
<< "default bits = " << emory::chunk::DEFAULT_PARAMS.bits << '\n'
|
<< "default bits = " << emory::chunk::DEFAULT_PARAMS.bits << '\n'
|
||||||
<< "default window = " << emory::chunk::DEFAULT_PARAMS.window << '\n'
|
<< "default window = " << emory::chunk::DEFAULT_PARAMS.window << '\n'
|
||||||
<< "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n';
|
<< "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n'
|
||||||
|
<< "default maximum = " << emory::chunk::DEFAULT_PARAMS.maximum << '\n';
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -178,6 +180,8 @@ int main (int argc, char const **argv)
|
|||||||
p.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]);
|
p.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]);
|
||||||
if (argc > ARG_BITS + 1)
|
if (argc > ARG_BITS + 1)
|
||||||
p.minimum = cruft::parse::from_string<std::size_t> (argv[ARG_MINIMUM]);
|
p.minimum = cruft::parse::from_string<std::size_t> (argv[ARG_MINIMUM]);
|
||||||
|
if (argc > ARG_MAXIMUM + 1)
|
||||||
|
p.maximum = cruft::parse::from_string<std::size_t> (argv[ARG_MAXIMUM]);
|
||||||
|
|
||||||
std::cerr << p << '\n';
|
std::cerr << p << '\n';
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ enum {
|
|||||||
ARG_BITS,
|
ARG_BITS,
|
||||||
ARG_WINDOW,
|
ARG_WINDOW,
|
||||||
ARGS_MINIMUM,
|
ARGS_MINIMUM,
|
||||||
|
ARGS_MAXIMUM,
|
||||||
ARGS_TARGET,
|
ARGS_TARGET,
|
||||||
ARGS_SOURCE,
|
ARGS_SOURCE,
|
||||||
|
|
||||||
@ -34,14 +35,15 @@ enum {
|
|||||||
int main (int argc, char const **argv)
|
int main (int argc, char const **argv)
|
||||||
{
|
{
|
||||||
if (argc < NUM_ARGS) {
|
if (argc < NUM_ARGS) {
|
||||||
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <target> <source> [...]\n";
|
std::cerr << "usage: " << argv[ARG_SELF] << " <bits> <window> <minimum> <maximum> <target> <source> [...]\n";
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
emory::chunk::params const p {
|
emory::chunk::params const p {
|
||||||
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
|
.bits = cruft::parse::from_string<std::size_t> (argv[ARG_BITS ]),
|
||||||
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
|
.window = cruft::parse::from_string<std::size_t> (argv[ARG_WINDOW]),
|
||||||
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
|
.minimum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MINIMUM]),
|
||||||
|
.maximum = cruft::parse::from_string<std::ptrdiff_t> (argv[ARGS_MAXIMUM]),
|
||||||
};
|
};
|
||||||
|
|
||||||
std::clog << "Hashing target\n";
|
std::clog << "Hashing target\n";
|
||||||
|
Loading…
Reference in New Issue
Block a user