1use super::{
2 GappedAlignment, REF_MYCO_RRS, SeqIdHit, align_to_ref, base_at_ref_pos,
3 dedup_substring_same_desc, parse_multi_fasta, reverse_complement,
4};
5use serde::{Deserialize, Serialize};
6use std::collections::BTreeMap;
7use std::sync::LazyLock;
8
9type RrsSnpMap = BTreeMap<usize, (u8, BTreeMap<u8, (Vec<String>, String)>)>;
12
13#[derive(Debug, Deserialize, Clone)]
14struct ResistanceVariant {
15 #[serde(rename = "Gene")]
16 gene: String,
17 #[serde(rename = "Mutation")]
18 mutation: String,
19 #[serde(rename = "drug")]
20 drug: String,
21 #[serde(rename = "type")]
22 confers: String,
23 #[serde(rename = "E.coli-nomenclature", default)]
24 ecoli_nomenclature: String,
25}
26
27fn parse_rrs_resistance_snps(csv: &str) -> RrsSnpMap {
28 let mut rdr = csv::Reader::from_reader(csv.as_bytes());
29 let mut map: RrsSnpMap = BTreeMap::new();
30 for row in rdr.deserialize::<ResistanceVariant>() {
31 let row = row.unwrap();
32 if row.gene.trim() != "rrs" {
33 continue;
34 }
35 if row.confers.trim() != "drug_resistance" {
36 continue;
37 }
38 let drug = row.drug.trim().to_string();
39 let ecoli = row.ecoli_nomenclature.trim().to_string();
40 let m = row.mutation.trim();
41 if let Some(rest) = m.strip_prefix("n.") {
42 let digits_end = rest
43 .find(|c: char| !c.is_ascii_digit())
44 .unwrap_or(rest.len());
45 if let Ok(pos1) = rest[..digits_end].parse::<usize>() {
46 let after_pos = &rest[digits_end..];
47 if let (Some(wt), Some(alt)) = (
48 after_pos.bytes().next(),
49 after_pos
50 .strip_prefix(|_: char| true)
51 .and_then(|s| s.strip_prefix('>'))
52 .and_then(|s| s.bytes().next()),
53 ) {
54 let entry = map.entry(pos1 - 1).or_insert_with(|| (wt, BTreeMap::new()));
55 let variant = entry.1.entry(alt).or_insert_with(|| (Vec::new(), ecoli.clone()));
56 if !variant.0.contains(&drug) {
57 variant.0.push(drug);
58 }
59 }
60 }
61 }
62 }
63 map
64}
65
66static RRS_RESISTANCE_SNPS: LazyLock<BTreeMap<&'static str, RrsSnpMap>> = LazyLock::new(|| {
67 [
68 ("Mycobacterium abscessus", include_str!("../../res/sequences/ntm-db/db/Mycobacterium_abscessus/variants.csv")),
69 ("Mycobacterium avium", include_str!("../../res/sequences/ntm-db/db/Mycobacterium_avium/variants.csv")),
70 ("Mycobacterium intracellulare", include_str!("../../res/sequences/ntm-db/db/Mycobacterium_intracellulare/variants.csv")),
71 ]
72 .into_iter()
73 .map(|(species, csv)| (species, parse_rrs_resistance_snps(csv)))
74 .collect()
75});
76
77static RRS_REFS: LazyLock<Vec<(String, String, Vec<u8>)>> =
79 LazyLock::new(|| dedup_substring_same_desc(parse_multi_fasta(REF_MYCO_RRS)));
80
81#[derive(Clone, Debug, Serialize, Deserialize)]
82pub struct RrsSnpCall {
83 pub ref_pos: usize,
85 pub query_base: Option<u8>,
87 pub wt_base: u8,
89 #[serde(with = "super::serde_helpers::u8_btree_map")]
91 pub resistance_bases: BTreeMap<u8, (Vec<String>, String)>,
92}
93
94impl RrsSnpCall {
95 pub fn call_tag(&self) -> String {
96 let ecoli_prefix: Option<&str> = self.resistance_bases.values()
97 .next()
98 .map(|(_, nom)| &nom[..nom.len().saturating_sub(1)]);
99
100 match self.query_base {
101 None => "".to_string(),
102 Some(b) if b == self.wt_base => match ecoli_prefix {
103 Some(p) => format!("{} (wt, E.coli {})", b as char, p),
104 None => format!("{} (wt)", b as char),
105 },
106 Some(b) if self.resistance_bases.contains_key(&b) => {
107 let (drugs, ecoli) = &self.resistance_bases[&b];
108 format!("{} ({}, E.coli {})", b as char, drugs.join(", "), ecoli)
109 }
110 Some(b) => match ecoli_prefix {
111 Some(p) => format!("{} (mutation, E.coli {}{})", b as char, p, b as char),
112 None => format!("{} (mutation)", b as char),
113 },
114 }
115 }
116}
117
118#[derive(Debug, Clone, Default, Serialize, Deserialize)]
120pub struct RrsSusceptibilityCalls {
121 pub snp_calls: Vec<RrsSnpCall>,
122 pub is_susceptible: Option<bool>,
123 pub is_susceptible_rare: Option<bool>,
124}
125
126pub fn is_susceptible_rrs(snp_calls: &[RrsSnpCall]) -> Option<bool> {
129 if snp_calls
130 .iter()
131 .any(|c| c.query_base.is_some_and(|b| c.resistance_bases.contains_key(&b)))
132 {
133 Some(false)
134 } else {
135 None
136 }
137}
138
139pub fn is_susceptible_rrs_by_snp_calls_rare(snp_calls: &[RrsSnpCall]) -> Option<bool> {
143 is_susceptible_rrs(snp_calls)
144}
145
146fn call_rrs_snps(snps: &RrsSnpMap, ga: &GappedAlignment) -> Vec<RrsSnpCall> {
147 snps.iter()
148 .map(|(&ref_pos, (wt_base, alt_to_drugs))| {
149 let query_base =
150 base_at_ref_pos(&ga.gapped_query, &ga.gapped_ref, ga.ref_start, ref_pos);
151 RrsSnpCall {
152 ref_pos,
153 query_base,
154 wt_base: *wt_base,
155 resistance_bases: alt_to_drugs.clone(),
156 }
157 })
158 .collect()
159}
160
161pub fn identify_sequence_16s(query: &[u8]) -> Vec<SeqIdHit> {
177 let rc = reverse_complement(query);
178 let mut hits: Vec<SeqIdHit> = RRS_REFS
179 .iter()
180 .filter(|(_, _, refseq)| refseq.len() >= super::MIN_RRS_REF_LEN)
181 .map(|(accession, description, refseq)| {
182 let fwd = align_to_ref(query, refseq);
183 let rev = align_to_ref(&rc, refseq);
184 let (ga, is_reverse) = if rev.identity > fwd.identity {
185 (rev, true)
186 } else {
187 (fwd, false)
188 };
189 let rrs_snp_calls = if accession.contains(':') {
190 RRS_RESISTANCE_SNPS
191 .get(description.as_str())
192 .map(|snps| call_rrs_snps(snps, &ga))
193 .unwrap_or_default()
194 } else {
195 vec![]
196 };
197 SeqIdHit {
198 accession: accession.clone(),
199 description: description.clone(),
200 identity: ga.identity,
201 is_reverse,
202 kansasii_gastri_snp_calls: vec![],
203 marinum_ulcerans_snp_calls: vec![],
204 rrl_snp_calls: vec![],
205 rrs_snp_calls,
206 erm41_snp_calls: vec![],
207 pnca_snp_calls: vec![],
208 aligned_query: ga.gapped_query,
209 aligned_ref: ga.gapped_ref,
210 ref_start: ga.ref_start,
211 erm41_position_28_opt: None,
212 rrl_position_2058_2059_opt: None,
213 }
214 })
215 .collect();
216 hits.sort_by(|a, b| {
217 b.identity
218 .partial_cmp(&a.identity)
219 .unwrap_or(std::cmp::Ordering::Equal)
220 });
221 hits
222}