diff --git a/emory/chunk/find.hpp b/emory/chunk/find.hpp index 7f5cdb1..cfa3640 100644 --- a/emory/chunk/find.hpp +++ b/emory/chunk/find.hpp @@ -9,6 +9,7 @@ #include "params.hpp" #include +#include /////////////////////////////////////////////////////////////////////////////// @@ -37,14 +38,18 @@ namespace emory::chunk { hash_state = cruft::rotatel (hash_state, 1) ^ *cursor++; for ( ; cursor < src.end () - p.window; ++cursor) { - if (likely (hash_state & mask)) { - hash_state = cruft::rotatel (hash_state, 1) - ^ cruft::rotatel (u64 (*(cursor - p.window)), p.window) - ^ *cursor; - continue; + if (cursor < start + p.maximum) { + if (likely (hash_state & mask)) { + hash_state = cruft::rotatel (hash_state, 1) + ^ cruft::rotatel (u64 (*(cursor - p.window)), p.window) + ^ *cursor; + continue; + } } cruft::view const region { start, cursor }; + CHECK_GE (cursor - start, p.minimum); + CHECK_LE (cursor - start, p.maximum); *dst = { .offset = { diff --git a/emory/chunk/ostream.cpp b/emory/chunk/ostream.cpp index eed6201..5dc8bc6 100644 --- a/emory/chunk/ostream.cpp +++ b/emory/chunk/ostream.cpp @@ -23,6 +23,7 @@ emory::chunk::operator<< (std::ostream &os, params const &val) return os << "{ bits: " << val.bits << ", window: " << val.window << ", minimum: " << val.minimum + << ", maximum: " << val.maximum << " }"; } diff --git a/emory/chunk/params.hpp b/emory/chunk/params.hpp index 41d67bf..ec71e14 100644 --- a/emory/chunk/params.hpp +++ b/emory/chunk/params.hpp @@ -18,11 +18,14 @@ namespace emory::chunk { std::size_t window; /// The minimum number of bytes for a matching region. std::ptrdiff_t minimum; + /// The maximum number of bytes for a matching region + std::ptrdiff_t maximum; }; constexpr params DEFAULT_PARAMS { .bits = 12, .window = 8, .minimum = 4096, + .maximum = 4 * 1024 * 1024, }; } diff --git a/tools/analyse.cpp b/tools/analyse.cpp index 9e06f9b..f8800d6 100644 --- a/tools/analyse.cpp +++ b/tools/analyse.cpp @@ -154,6 +154,7 @@ enum { ARG_BITS, ARG_WINDOW, ARG_MINIMUM, + ARG_MAXIMUM, NUM_ARGS, NUM_ARGS_REQUIRED = 3, @@ -164,10 +165,11 @@ enum { int main (int argc, char const **argv) { if (argc < NUM_ARGS_REQUIRED) { - std::cerr << "usage: " << argv[ARG_SELF] << " [bits] [window] [minimum]\n" + std::cerr << "usage: " << argv[ARG_SELF] << " [bits] [window] [minimum] [maximum]\n" << "default bits = " << emory::chunk::DEFAULT_PARAMS.bits << '\n' << "default window = " << emory::chunk::DEFAULT_PARAMS.window << '\n' - << "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n'; + << "default minimum = " << emory::chunk::DEFAULT_PARAMS.minimum << '\n' + << "default maximum = " << emory::chunk::DEFAULT_PARAMS.maximum << '\n'; return EXIT_FAILURE; } @@ -178,6 +180,8 @@ int main (int argc, char const **argv) p.window = cruft::parse::from_string (argv[ARG_WINDOW]); if (argc > ARG_BITS + 1) p.minimum = cruft::parse::from_string (argv[ARG_MINIMUM]); + if (argc > ARG_MAXIMUM + 1) + p.maximum = cruft::parse::from_string (argv[ARG_MAXIMUM]); std::cerr << p << '\n'; diff --git a/tools/compare.cpp b/tools/compare.cpp index 80933d2..7744de1 100644 --- a/tools/compare.cpp +++ b/tools/compare.cpp @@ -24,6 +24,7 @@ enum { ARG_BITS, ARG_WINDOW, ARGS_MINIMUM, + ARGS_MAXIMUM, ARGS_TARGET, ARGS_SOURCE, @@ -34,14 +35,15 @@ enum { int main (int argc, char const **argv) { if (argc < NUM_ARGS) { - std::cerr << "usage: " << argv[ARG_SELF] << " [...]\n"; + std::cerr << "usage: " << argv[ARG_SELF] << " [...]\n"; return EXIT_FAILURE; } emory::chunk::params const p { - .bits = cruft::parse::from_string (argv[ARG_BITS ]), - .window = cruft::parse::from_string (argv[ARG_WINDOW]), + .bits = cruft::parse::from_string (argv[ARG_BITS ]), + .window = cruft::parse::from_string (argv[ARG_WINDOW]), .minimum = cruft::parse::from_string (argv[ARGS_MINIMUM]), + .maximum = cruft::parse::from_string (argv[ARGS_MAXIMUM]), }; std::clog << "Hashing target\n";