Skip to content

Commit

Permalink
Added more tests and rewrote parse_groups without quick_csv
Browse files Browse the repository at this point in the history
  • Loading branch information
lucaparmigiani committed Sep 24, 2024
1 parent 466cefd commit b377052
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 12 deletions.
121 changes: 121 additions & 0 deletions src/abacus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1298,4 +1298,125 @@ mod tests {
let hist = abacus_by_total.construct_hist_bps(&graph_aux);
assert_eq!(hist, test_hist, "Expected same hist");
}

fn setup_test_data() -> (GraphAuxilliary, Params, String) {
let test_gfa_file = "test/cdbg.gfa";
let graph_aux = GraphAuxilliary::from_gfa(test_gfa_file, CountType::Node);
let params = Params::test_default_histgrowth();
(graph_aux, params, test_gfa_file.to_string())
}

#[test]
fn test_path_auxilliary_from_params_success() {
let (graph_aux, params, _) = setup_test_data();

let path_aux = AbacusAuxilliary::from_params(&params, &graph_aux);
assert!(path_aux.is_ok(), "Expected successful creation of AbacusAuxilliary");

let path_aux = path_aux.unwrap();
dbg!(&path_aux.groups.len());
assert_eq!(path_aux.groups.len(), 6); // number of paths == groups
}

#[test]
fn test_path_auxilliary_load_groups_by_sample() {
let (graph_aux, _, _) = setup_test_data();

let result = AbacusAuxilliary::load_groups("", false, true, &graph_aux);
assert!(result.is_ok(), "Expected successful group loading by sample");
let groups = result.unwrap();
let mut group_count = HashSet::new();
for (_, g) in groups {
group_count.insert(g);
}
assert_eq!(group_count.len(), 4, "Expected one group per sample");
}

#[test]
fn test_path_auxilliary_load_groups_by_haplotype() {
let (graph_aux, _, _) = setup_test_data();

let result = AbacusAuxilliary::load_groups("", true, false, &graph_aux);
let groups = result.unwrap();
let mut group_count = HashSet::new();
for (_, g) in groups {
group_count.insert(g);
}
assert_eq!(group_count.len(), 5, "Expected 5 groups based on haplotype");
}

#[test]
fn test_complement_with_group_assignments_valid() {
let groups = HashMap::from([
(PathSegment::from_str("a#1#h1"), "G1".to_string()),
(PathSegment::from_str("b#1#h1"), "G1".to_string()),
(PathSegment::from_str("c#1#h1"), "G2".to_string()),
]);

let coords = Some(vec![PathSegment::from_str("G1")]);
let result = AbacusAuxilliary::complement_with_group_assignments(coords, &groups);
assert!(result.is_ok(), "Expected successful complement with group assignments");

let complemented = result.unwrap();
assert!(complemented.is_some(), "Expected Some(complemented) coordinates");
assert_eq!(complemented.unwrap().len(), 2, "Expected 2 path segments in the complemented list");
}

#[test]
fn test_complement_with_group_assignments_invalid() {
let groups = HashMap::from([
(PathSegment::from_str("a#0"), "G1".to_string()),
(PathSegment::from_str("b#0"), "G1".to_string()),
]);

let coords = Some(vec![PathSegment::from_str("G1:1-5")]);
let result = AbacusAuxilliary::complement_with_group_assignments(coords, &groups);
assert!(result.is_err(), "Expected error due to invalid group identifier with start/stop information");
}

#[test]
fn test_build_subpath_map_with_overlaps() {
let path_segments = vec![
PathSegment::new("sample".to_string(), "hap1".to_string(), "seq1".to_string(), Some(0), Some(100)),
PathSegment::new("sample".to_string(), "hap1".to_string(), "seq1".to_string(), Some(50), Some(150)),
PathSegment::new("sample".to_string(), "hap1".to_string(), "seq2".to_string(), Some(0), Some(100)),
];

let subpath_map = AbacusAuxilliary::build_subpath_map(&path_segments);
assert_eq!(subpath_map.len(), 2, "Expected 2 sequences in the subpath map");
assert_eq!(subpath_map.get("sample#hap1#seq1").unwrap().len(), 1, "Expected 1 non-overlapping interval for seq1");
assert_eq!(subpath_map.get("sample#hap1#seq2").unwrap().len(), 1, "Expected 1 interval for seq2");
}

#[test]
fn test_get_path_order_with_exclusions() {
let (graph_aux, _, _) = setup_test_data();

let path_aux = AbacusAuxilliary {
groups: AbacusAuxilliary::load_groups("", false, false, &graph_aux).unwrap(),
include_coords: None,
exclude_coords: Some(vec![PathSegment::from_str("a#1#h1"),
PathSegment::from_str("b#1#h1"),
PathSegment::from_str("b#1#h1")]), //duplicates do not cause any error
order: None,
};
let ordered_paths = path_aux.get_path_order(&graph_aux.path_segments);
assert_eq!(ordered_paths.len(), 4, "Expected 4 paths in the final order");
}

#[test]
fn test_path_auxilliary_count_groups() {
let path_aux = AbacusAuxilliary {
groups: HashMap::from([
(PathSegment::from_str("a#1#h1"), "G1".to_string()),
(PathSegment::from_str("b#1#h1"), "G1".to_string()),
(PathSegment::from_str("c#1#h1"), "G2".to_string()),
]),
include_coords: None,
exclude_coords: None,
order: None,
};

assert_eq!(path_aux.count_groups(), 2, "Expected 2 unique groups");
}
}
68 changes: 68 additions & 0 deletions src/hist.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,71 @@ impl HistAuxilliary {
})
}
}

#[cfg(test)]
mod tests {
use super::*;

fn assert_almost_eq(a: f64, b: f64) {
let epsilon = 1e-10;
if (a - b).abs() > epsilon {
panic!("Values are not almost equal: {} vs {}", a, b);
}
}

fn factorial(n: usize) -> f64 {
(1..=n).fold(1.0, |acc, x| acc * x as f64)
}

#[test]
fn test_choose_function() {
assert_almost_eq(choose(5, 0), 0.0);
assert_almost_eq(choose(5, 5), 0.0);
assert_almost_eq(choose(5, 1), (5.0f64).log2());
assert_almost_eq(choose(5, 4), (5.0f64).log2());
let expected = (factorial(5) / (factorial(2) * factorial(3))).log2();
assert_almost_eq(choose(5, 2), expected);
assert_eq!(choose(5, 6), 0.0);
}

#[test]
fn test_hist_calc_growth_union() {
let hist = Hist {
count: CountType::Node,
coverage: vec![0,5,3,2],
};

let t_coverage = Threshold::Absolute(0);
let test_growth: Vec<f64> = vec![5.666666666666667, 8.333333333333334, 10.0];
let growth = hist.calc_growth_union(&t_coverage);
assert_eq!(growth, test_growth, "Wrong growth union");
}

#[test]
fn test_hist_calc_growth_core() {
let hist = Hist {
count: CountType::Node,
coverage: vec![0,5,3,2],
};

let t_coverage = Threshold::Absolute(0);
let test_core: Vec<f64> = vec![5.666666666666666, 3.0, 2.0];
let core = hist.calc_growth_core(&t_coverage);
assert_eq!(core, test_core, "Wrong growth core");
}

#[test]
fn test_hist_calc_growth_quorum() {
let hist = Hist {
count: CountType::Node,
coverage: vec![0,5,3,2,3,5,0,4,2,1],
};

let t_coverage = Threshold::Absolute(0);
let t_quorum = Threshold::Relative(0.9);
let test_growth: Vec<f64> = vec![11.88888888888889, 7.027777777777777, 4.761904761904761,
3.4444444444444438, 2.5952380952380953, 2.0, 1.5555555555555545, 1.2222222222222217, 1.0];
let growth = hist.calc_growth_quorum(&t_coverage, &t_quorum);
assert_eq!(growth, test_growth, "Wrong growth quorum");
}
}
57 changes: 45 additions & 12 deletions src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,29 @@ pub fn parse_bed_to_path_segments<R: Read>(data: &mut BufReader<R>, use_block_in
pub fn parse_groups<R: Read>(data: &mut BufReader<R>) -> Result<Vec<(PathSegment, String)>, Error> {
let mut res: Vec<(PathSegment, String)> = Vec::new();

let reader = Csv::from_reader(data)
.delimiter(b'\t')
.flexible(true)
.has_header(false);
for (i, row) in reader.enumerate() {
let row = row.unwrap();
let mut row_it = row.bytes_columns();
let path_seg = PathSegment::from_str(str::from_utf8(row_it.next().unwrap()).unwrap());
if let Some(col) = row_it.next() {
res.push((path_seg, str::from_utf8(col).unwrap().to_string()));
} else {
let msg = format!("error in line {}: table must have two columns", i);
let mut i = 1;
let mut buf = vec![];
while data.read_until(b'\n', &mut buf).unwrap_or(0) > 0 {
//Remove new line at the end
if let Some(&last_byte) = buf.last() {
if last_byte == b'\n' || last_byte == b'\r' {
buf.pop();
}
}
let line = String::from_utf8(buf.clone()).expect(&format!("error in line {}: some character is not UTF-8",i));
let columns: Vec<&str> = line.split('\t').collect();

if columns.len() != 2 {
let msg = format!("error in line {}: table must have exactly two columns", i);
log::error!("{}", &msg);
return Err(Error::new(ErrorKind::InvalidData, msg));
}

let path_seg = PathSegment::from_str(columns[0]);
res.push((path_seg, columns[1].to_string()));

i += 1;
buf.clear();
}

Ok(res)
Expand Down Expand Up @@ -1608,5 +1616,30 @@ mod tests {
]
);
}

#[test]
fn test_parse_groups_with_valid_input() {
//let (graph_aux, _, _) = setup_test_data();
let file_name = "test/test_groups.txt";
let test_path_segments = vec![
PathSegment::from_str("a#0"),
PathSegment::from_str("b#0"),
PathSegment::from_str("c#0"),
PathSegment::from_str("c#1"),
PathSegment::from_str("d#0")
];
let test_groups = vec!["G1","G1","G2","G2","G2"];

let mut data = BufReader::new(std::fs::File::open(file_name).unwrap());
let result = parse_groups(&mut data);
assert!(result.is_ok(), "Expected successful group loading");
let path_segments_group = result.unwrap();
assert!(path_segments_group.len() > 0, "Expected non-empty group assignments");
assert_eq!(path_segments_group.len(), 5); // number of paths == groups
for (i, (path_seg, group)) in path_segments_group.into_iter().enumerate() {
assert_eq!(path_seg, test_path_segments[i]);
assert_eq!(group, test_groups[i]);
}
}
}

0 comments on commit b377052

Please sign in to comment.