cosmic_files/sequencing/
rpob.rs1use super::{REF_MYCO_RPOB, SeqIdHit, align_to_ref, dedup_substring_same_desc, parse_multi_fasta, reverse_complement};
2use std::sync::LazyLock;
3
4static RPOB_REFS: LazyLock<Vec<(String, String, Vec<u8>)>> =
6 LazyLock::new(|| dedup_substring_same_desc(parse_multi_fasta(REF_MYCO_RPOB)));
7
8pub fn identify_sequence_rpob(query: &[u8]) -> Vec<SeqIdHit> {
9 let rc = reverse_complement(query);
10 let mut hits: Vec<SeqIdHit> = RPOB_REFS
11 .iter()
12 .filter(|(_, _, refseq)| refseq.len() >= super::MIN_RPOB_REF_LEN)
13 .map(|(accession, description, refseq)| {
14 let fwd = align_to_ref(query, refseq);
15 let rev = align_to_ref(&rc, refseq);
16 let (ga, is_reverse) = if rev.identity > fwd.identity {
17 (rev, true)
18 } else {
19 (fwd, false)
20 };
21 SeqIdHit {
22 accession: accession.clone(),
23 description: description.clone(),
24 identity: ga.identity,
25 is_reverse,
26 kansasii_gastri_snp_calls: vec![],
27 marinum_ulcerans_snp_calls: vec![],
28 rrl_snp_calls: vec![],
29 rrs_snp_calls: vec![],
30 erm41_snp_calls: vec![],
31 pnca_snp_calls: vec![],
32 aligned_query: ga.gapped_query,
33 aligned_ref: ga.gapped_ref,
34 ref_start: ga.ref_start,
35 erm41_position_28_opt: None,
36 rrl_position_2058_2059_opt: None,
37 }
38 })
39 .collect();
40 hits.sort_by(|a, b| {
41 b.identity
42 .partial_cmp(&a.identity)
43 .unwrap_or(std::cmp::Ordering::Equal)
44 });
45 hits
46}