12 #include <seqan3/alphabet/nucleotide/dna4.hpp>
13 #include <seqan3/search/views/kmer_hash.hpp>
26 uint64_t window_size{};
28 seqan3::shape shape{};
34 std::vector<uint64_t> forward_hashes{};
53 seqan3::shape
const shape_,
54 uint64_t
const seed_ = 0x8F3F73B5CF1C9ADE) :
55 window_size{window_size_.v},
57 shape_size{shape.size()},
58 seed{adjust_seed(shape.count(), seed_)}
60 assert(window_size >= shape_size);
68 void resize(
window const window_size_, seqan3::shape
const shape_, uint64_t
const seed_ = 0x8F3F73B5CF1C9ADE)
70 window_size = window_size_.
v;
72 shape_size = shape.size();
73 seed = adjust_seed(shape.count(), seed_);
74 assert(window_size >= shape_size);
77 void compute(std::vector<seqan3::dna4>
const & text)
79 assert(window_size && shape_size && seed);
81 size_t const text_length = text.size();
82 assert(shape_size <= text_length);
83 assert(window_size <= text_length);
85 uint64_t
const max_number_of_minimiser = text_length - window_size + 1u;
86 uint64_t
const kmers_per_window = window_size - shape_size + 1u;
92 auto apply_xor = [
this](uint64_t
const value)
96 auto kmer_view = text | seqan3::views::kmer_hash(shape) | std::views::transform(apply_xor);
97 forward_hashes.assign(kmer_view.begin(), kmer_view.end());
100 std::deque<std::pair<uint64_t, uint64_t>> window_hashes;
103 for (uint64_t i = 0; i < kmers_per_window; ++i)
104 window_hashes.emplace_back(forward_hashes[i], i);
107 auto min = std::min_element(std::begin(window_hashes), std::end(window_hashes));
112 for (uint64_t i = kmers_per_window; i < max_number_of_minimiser; ++i)
115 uint64_t
const new_hash{forward_hashes[i + kmers_per_window - 1]};
116 window_hashes.emplace_back(new_hash, i);
118 if (new_hash < min->second)
120 min = std::prev(std::end(window_hashes));
123 else if (min == std::begin(window_hashes))
126 min = std::min_element(++std::begin(window_hashes), std::prev(std::end(window_hashes)));
130 window_hashes.pop_front();
Definition: forward_strand_minimiser.hpp:19
Definition: forward_strand_minimiser.hpp:23
forward_strand_minimiser(forward_strand_minimiser &&)=default
Defaulted.
forward_strand_minimiser()=default
Defaulted.
forward_strand_minimiser(window const window_size_, seqan3::shape const shape_, uint64_t const seed_=0x8F3F73B5CF1C9ADE)
Constructs a minimiser from given k-mer, window size and a seed.
Definition: forward_strand_minimiser.hpp:52
~forward_strand_minimiser()=default
Defaulted.
std::vector< uint64_t > minimiser_begin
Stores the begin positions of the minimisers.
Definition: forward_strand_minimiser.hpp:38
forward_strand_minimiser(forward_strand_minimiser const &)=default
Defaulted.
forward_strand_minimiser & operator=(forward_strand_minimiser &&)=default
Defaulted.
forward_strand_minimiser & operator=(forward_strand_minimiser const &)=default
Defaulted.
void resize(window const window_size_, seqan3::shape const shape_, uint64_t const seed_=0x8F3F73B5CF1C9ADE)
Resize the minimiser.
Definition: forward_strand_minimiser.hpp:68
void compute(std::vector< seqan3::dna4 > const &text)
Definition: forward_strand_minimiser.hpp:77
Strong type for passing the window size.
Definition: strong_types.hpp:17
uint32_t v
Definition: strong_types.hpp:18