fastq_to_fasta
A template for creation of SeqAn3 apps, with a FASTQ to FASTA example app.
forward_strand_minimiser.hpp
Go to the documentation of this file.
1 // --------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/raptor/blob/main/LICENSE.md
6 // --------------------------------------------------------------------------------------------------
7 
8 #pragma once
9 
10 #include <deque>
11 
12 #include <seqan3/alphabet/nucleotide/dna4.hpp>
13 #include <seqan3/search/views/kmer_hash.hpp>
14 
15 #include <raptor/adjust_seed.hpp>
16 #include <raptor/strong_types.hpp>
17 
19 {
20 
21 // Minimiser without looking at reverse complement
23 {
24 private:
26  uint64_t window_size{};
28  seqan3::shape shape{};
30  uint8_t shape_size{};
32  uint64_t seed{};
34  std::vector<uint64_t> forward_hashes{};
35 
36 public:
38  std::vector<uint64_t> minimiser_begin;
39 
46 
52  forward_strand_minimiser(window const window_size_,
53  seqan3::shape const shape_,
54  uint64_t const seed_ = 0x8F3F73B5CF1C9ADE) :
55  window_size{window_size_.v},
56  shape{shape_},
57  shape_size{shape.size()},
58  seed{adjust_seed(shape.count(), seed_)}
59  {
60  assert(window_size >= shape_size);
61  }
62 
68  void resize(window const window_size_, seqan3::shape const shape_, uint64_t const seed_ = 0x8F3F73B5CF1C9ADE)
69  {
70  window_size = window_size_.v;
71  shape = shape_;
72  shape_size = shape.size();
73  seed = adjust_seed(shape.count(), seed_);
74  assert(window_size >= shape_size);
75  }
76 
77  void compute(std::vector<seqan3::dna4> const & text)
78  {
79  assert(window_size && shape_size && seed); // Forgot to initialise/resize?
80 
81  size_t const text_length = text.size();
82  assert(shape_size <= text_length);
83  assert(window_size <= text_length);
84 
85  uint64_t const max_number_of_minimiser = text_length - window_size + 1u;
86  uint64_t const kmers_per_window = window_size - shape_size + 1u;
87 
88  minimiser_begin.clear();
89  minimiser_begin.reserve(max_number_of_minimiser);
90 
91  // Compute all k-mer hashes.
92  auto apply_xor = [this](uint64_t const value)
93  {
94  return value ^ seed;
95  };
96  auto kmer_view = text | seqan3::views::kmer_hash(shape) | std::views::transform(apply_xor);
97  forward_hashes.assign(kmer_view.begin(), kmer_view.end());
98 
99  // Stores hash and begin for all k-mers in the window
100  std::deque<std::pair<uint64_t, uint64_t>> window_hashes;
101 
102  // Initialisation. We need to compute all hashes for the first window.
103  for (uint64_t i = 0; i < kmers_per_window; ++i)
104  window_hashes.emplace_back(forward_hashes[i], i);
105 
106  // The minimum hash is the minimiser. Store the begin position.
107  auto min = std::min_element(std::begin(window_hashes), std::end(window_hashes));
108  minimiser_begin.push_back(min->second);
109 
110  // For the following windows, we remove the first window k-mer (is now not in window) and add the new k-mer
111  // that results from the window shifting.
112  for (uint64_t i = kmers_per_window; i < max_number_of_minimiser; ++i)
113  {
114  // Already store the new hash without removing the first one.
115  uint64_t const new_hash{forward_hashes[i + kmers_per_window - 1]}; // Already did kmers_per_window - 1 many
116  window_hashes.emplace_back(new_hash, i);
117 
118  if (new_hash < min->second) // New hash is the minimum.
119  {
120  min = std::prev(std::end(window_hashes));
121  minimiser_begin.push_back(min->second);
122  }
123  else if (min == std::begin(window_hashes)) // Minimum is the yet-to-be-removed begin of the window.
124  {
125  // The first hash will be removed, the last one is caught by the previous if.
126  min = std::min_element(++std::begin(window_hashes), std::prev(std::end(window_hashes)));
127  minimiser_begin.push_back(min->second);
128  }
129 
130  window_hashes.pop_front(); // Remove the first k-mer.
131  }
132  return;
133  }
134 };
135 
136 } // namespace raptor::threshold
Definition: forward_strand_minimiser.hpp:19
Definition: forward_strand_minimiser.hpp:23
forward_strand_minimiser(forward_strand_minimiser &&)=default
Defaulted.
forward_strand_minimiser(window const window_size_, seqan3::shape const shape_, uint64_t const seed_=0x8F3F73B5CF1C9ADE)
Constructs a minimiser from given k-mer, window size and a seed.
Definition: forward_strand_minimiser.hpp:52
std::vector< uint64_t > minimiser_begin
Stores the begin positions of the minimisers.
Definition: forward_strand_minimiser.hpp:38
forward_strand_minimiser(forward_strand_minimiser const &)=default
Defaulted.
forward_strand_minimiser & operator=(forward_strand_minimiser &&)=default
Defaulted.
forward_strand_minimiser & operator=(forward_strand_minimiser const &)=default
Defaulted.
void resize(window const window_size_, seqan3::shape const shape_, uint64_t const seed_=0x8F3F73B5CF1C9ADE)
Resize the minimiser.
Definition: forward_strand_minimiser.hpp:68
void compute(std::vector< seqan3::dna4 > const &text)
Definition: forward_strand_minimiser.hpp:77
Strong type for passing the window size.
Definition: strong_types.hpp:17
uint32_t v
Definition: strong_types.hpp:18