cosmic_files/sequencing/
rpob.rs

1use super::{REF_MYCO_RPOB, SeqIdHit, align_to_ref, dedup_substring_same_desc, parse_multi_fasta, reverse_complement};
2use std::sync::LazyLock;
3
4/// Parsed and substring-deduplicated rpoB reference sequences, initialised once.
5static RPOB_REFS: LazyLock<Vec<(String, String, Vec<u8>)>> =
6    LazyLock::new(|| dedup_substring_same_desc(parse_multi_fasta(REF_MYCO_RPOB)));
7
8pub fn identify_sequence_rpob(query: &[u8]) -> Vec<SeqIdHit> {
9    let rc = reverse_complement(query);
10    let mut hits: Vec<SeqIdHit> = RPOB_REFS
11        .iter()
12        .filter(|(_, _, refseq)| refseq.len() >= super::MIN_RPOB_REF_LEN)
13        .map(|(accession, description, refseq)| {
14            let fwd = align_to_ref(query, refseq);
15            let rev = align_to_ref(&rc, refseq);
16            let (ga, is_reverse) = if rev.identity > fwd.identity {
17                (rev, true)
18            } else {
19                (fwd, false)
20            };
21            SeqIdHit {
22                accession: accession.clone(),
23                description: description.clone(),
24                identity: ga.identity,
25                is_reverse,
26                kansasii_gastri_snp_calls: vec![],
27                marinum_ulcerans_snp_calls: vec![],
28                rrl_snp_calls: vec![],
29                rrs_snp_calls: vec![],
30                erm41_snp_calls: vec![],
31                pnca_snp_calls: vec![],
32                aligned_query: ga.gapped_query,
33                aligned_ref: ga.gapped_ref,
34                ref_start: ga.ref_start,
35                erm41_position_28_opt: None,
36                rrl_position_2058_2059_opt: None,
37            }
38        })
39        .collect();
40    hits.sort_by(|a, b| {
41        b.identity
42            .partial_cmp(&a.identity)
43            .unwrap_or(std::cmp::Ordering::Equal)
44    });
45    hits
46}