Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
leandro authored and rmcolq committed Jul 3, 2019
1 parent 84aaaac commit ffc8740
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 14 deletions.
16 changes: 12 additions & 4 deletions include/kmergraphwithcoverage.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class KmerGraphWithCoverage {
private:
//each coverage is represented as a std::vector, each position of the vector representing the coverage of a sample
//each coverage of a sample is represented by a std::pair, for the forward and reverse reads coverage of that sample
std::vector<std::vector<std::pair<uint32_t, uint32_t>>> nodeIndex2SampleCoverage;
std::vector<std::vector<std::pair<uint16_t, uint16_t>>> nodeIndex2SampleCoverage;
uint32_t exp_depth_covg;
float binomial_parameter_p;
float negative_binomial_parameter_p;
Expand All @@ -48,22 +48,30 @@ class KmerGraphWithCoverage {
virtual ~KmerGraphWithCoverage() = default;

//getter
//TODO: we should return a uint16_t instead of a uint32_t, but I did not want to change the interface
//TODO: converting uint16_t to uint32_t is safe anyway
//TODO: some parts of the code accumulates under the type of this variable, so it might be dangerous to change to uint16_t wihtout careful analysis
//TODO: leaving to uint32_t to be safe
uint32_t get_covg(uint32_t node_id, bool strand, uint32_t sample_id) const;

uint32_t get_num_reads() const { return num_reads; }
uint32_t get_total_number_samples() const {return total_number_samples; }

//setters
void increment_covg(uint32_t node_id, bool strand, uint32_t sample_id);
void set_covg(uint32_t node_id, uint32_t value, bool strand, uint32_t sample_id);
void set_covg(uint32_t node_id, uint16_t value, bool strand, uint32_t sample_id);
void set_exp_depth_covg(const uint32_t);
void set_binomial_parameter_p(const float);
void set_negative_binomial_parameters(const float &, const float &);
void set_thresh (int thresh) { this->thresh = thresh; }
void set_num_reads(uint32_t num_reads) { this->num_reads = num_reads; }

void zeroCoverages() {
for (auto &sampleCoverage: nodeIndex2SampleCoverage)
sampleCoverage = std::vector<std::pair<uint32_t, uint32_t>>(total_number_samples);
for (auto &sampleCoverage: nodeIndex2SampleCoverage) {
sampleCoverage = std::vector<std::pair<uint16_t, uint16_t>>(total_number_samples);
sampleCoverage.shrink_to_fit(); //tries to make this information as compact as possible (TODO: use sdsl?)
}

}

float nbin_prob(uint32_t, const uint32_t &sample_id);
Expand Down
24 changes: 16 additions & 8 deletions src/kmergraphwithcoverage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include <cstdio> /* NULL */
#include <cstdlib> /* srand, rand */
#include <cmath>
#include <cstdint>

#include <boost/math/distributions/negative_binomial.hpp>
#include <boost/log/trivial.hpp>
Expand Down Expand Up @@ -36,10 +37,15 @@ void KmerGraphWithCoverage::set_binomial_parameter_p(const float e_rate) {
void KmerGraphWithCoverage::increment_covg(uint32_t node_id, bool strand, uint32_t sample_id) {
assert(this->nodeIndex2SampleCoverage[node_id].size() > sample_id);

//get a pointer to the value we want to increment
uint16_t* coverage_ptr = nullptr;
if (strand)
this->nodeIndex2SampleCoverage[node_id][sample_id].first++;
coverage_ptr = &(this->nodeIndex2SampleCoverage[node_id][sample_id].first);
else
this->nodeIndex2SampleCoverage[node_id][sample_id].second++;
coverage_ptr = &(this->nodeIndex2SampleCoverage[node_id][sample_id].second);

if ((*coverage_ptr) < UINT16_MAX) //checks if it is safe to increase the coverage (no overflow)
++(*coverage_ptr);
}

uint32_t KmerGraphWithCoverage::get_covg(uint32_t node_id, bool strand, uint32_t sample_id) const {
Expand All @@ -48,12 +54,12 @@ uint32_t KmerGraphWithCoverage::get_covg(uint32_t node_id, bool strand, uint32_t
return 0;

if (strand)
return this->nodeIndex2SampleCoverage[node_id][sample_id].first;
return (uint32_t)(this->nodeIndex2SampleCoverage[node_id][sample_id].first);
else
return this->nodeIndex2SampleCoverage[node_id][sample_id].second;
return (uint32_t)(this->nodeIndex2SampleCoverage[node_id][sample_id].second);
}

void KmerGraphWithCoverage::set_covg(uint32_t node_id, uint32_t value, bool strand, uint32_t sample_id) {
void KmerGraphWithCoverage::set_covg(uint32_t node_id, uint16_t value, bool strand, uint32_t sample_id) {
assert(this->nodeIndex2SampleCoverage[node_id].size() > sample_id);
if (strand)
this->nodeIndex2SampleCoverage[node_id][sample_id].first = value;
Expand Down Expand Up @@ -416,6 +422,7 @@ void KmerGraphWithCoverage::save(const std::string &filepath, const std::shared_
}

//TODO: THIS SHOULD BE RECODED, WE ARE DUPLICATING CODE HERE (SEE KmerGraph::load())!!!
//TODO: remove this method?
void KmerGraphWithCoverage::load(const std::string &filepath) {
//TODO: this might be dangerous, recode this?
auto kmer_prg = const_cast<KmerGraph*>(this->kmer_prg);
Expand All @@ -425,7 +432,8 @@ void KmerGraphWithCoverage::load(const std::string &filepath) {
std::string line;
std::vector<std::string> split_line;
std::stringstream ss;
uint32_t id = 0, covg, from, to;
uint32_t id = 0, from, to;
uint16_t covg;
prg::Path p;
uint32_t num_nodes = 0;

Expand Down Expand Up @@ -466,9 +474,9 @@ void KmerGraphWithCoverage::load(const std::string &filepath) {
if (kmer_prg->k == 0 and p.length() > 0) {
kmer_prg->k = p.length();
}
covg = stoi(split(split_line[3], "FC:i:")[0]);
covg = (uint16_t)(stoul(split(split_line[3], "FC:i:")[0]));
set_covg(n->id, covg, 0, sample_id);
covg = stoi(split(split_line[4], "RC:i:")[0]);
covg = (uint16_t)(stoul(split(split_line[4], "RC:i:")[0]));
set_covg(n->id, covg, 1, sample_id);
if (split_line.size() >= 6) {
n->num_AT = std::stoi(split_line[5]);
Expand Down
4 changes: 2 additions & 2 deletions src/pangenome/pangraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,8 @@ void pangenome::Graph::copy_coverages_to_kmergraphs(const Graph &ref_pangraph, c
for (auto & kmergraph_node_ptr : pangraph_node.kmer_prg_with_coverage.kmer_prg->nodes){
const auto &knode_id = kmergraph_node_ptr->id;
assert(knode_id < ref_node.kmer_prg_with_coverage.kmer_prg->nodes.size());
pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, ref_node.kmer_prg_with_coverage.get_covg(knode_id, 0, ref_sample_id), 0, sample_id);
pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, ref_node.kmer_prg_with_coverage.get_covg(knode_id, 1, ref_sample_id), 1, sample_id);
pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, (uint16_t)(ref_node.kmer_prg_with_coverage.get_covg(knode_id, 0, ref_sample_id)), 0, sample_id);
pangraph_node.kmer_prg_with_coverage.set_covg(knode_id, (uint16_t)(ref_node.kmer_prg_with_coverage.get_covg(knode_id, 1, ref_sample_id)), 1, sample_id);
}
}
}
Expand Down

0 comments on commit ffc8740

Please sign in to comment.