1use super::reverse_complement;
2use super::{RRL_ANCHOR_L, RRL_ANCHOR_R, REF_MYCO_RRL};
3use super::{GappedAlignment, SeqIdHit, align_to_ref, base_at_ref_pos, dedup_substring_same_desc, parse_multi_fasta};
4use serde::{Deserialize, Serialize};
5use std::collections::BTreeMap;
6use std::sync::LazyLock;
7
8type RrlSnpMap = BTreeMap<usize, (u8, BTreeMap<u8, (Vec<String>, String)>)>;
11
12
13#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
14pub enum RrlPosition2058_2059 {
15 SusceptibleWildtype, ResistanceConferringMutation, Undetermined,
18}
19
20
21impl std::fmt::Display for RrlPosition2058_2059 {
22 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23 match self {
24 Self::SusceptibleWildtype => write!(f, "A2058/A2059 wildtype"),
25 Self::ResistanceConferringMutation => write!(f, "Mutation 2058/2059"),
26 Self::Undetermined => write!(f, "Undetermined"),
27 }
28 }
29}
30
31impl RrlPosition2058_2059 {
32 pub fn is_susceptible(&self) -> Option<bool> {
35 match self {
36 Self::SusceptibleWildtype => Some(true),
37 Self::ResistanceConferringMutation => Some(false),
38 Self::Undetermined => None,
39 }
40 }
41
42 fn call_position_2058_2059(read: &[u8]) -> Option<(u8, u8)> {
43 let anchor_len = RRL_ANCHOR_L.len();
44 let hit = read
45 .windows(anchor_len)
46 .position(|w| w.eq_ignore_ascii_case(RRL_ANCHOR_L))?;
47 let pos2058 = hit + anchor_len;
48 let base2058 = read.get(pos2058).copied()?.to_ascii_uppercase();
49 let pos2059 = pos2058 + 1;
50 let base2059 = read.get(pos2059).copied()?.to_ascii_uppercase();
51 let right_start = pos2059 + 1;
52 let right_end = right_start + RRL_ANCHOR_R.len();
53 if right_end > read.len() {
54 return None;
55 }
56 let right_ok = read[right_start..right_end].eq_ignore_ascii_case(RRL_ANCHOR_R);
57 if !right_ok {
58 return None;
59 }
60 Some((base2058, base2059))
61 }
62
63 fn from_bases(bases: Option<(u8, u8)>) -> Option<Self> {
64 match bases {
65 Some((b'A', b'A')) => Some(Self::SusceptibleWildtype),
66 Some((_, _)) => Some(Self::ResistanceConferringMutation),
67 None => None,
68 }
69 }
70
71 pub fn from_single_read(read: &[u8]) -> Option<Self> {
72 if let Some(call) = Self::from_bases(Self::call_position_2058_2059(read)) {
73 return Some(call);
74 }
75 let rc = reverse_complement(read);
76 Self::from_bases(Self::call_position_2058_2059(&rc))
77 }
78}
79
80pub fn is_susceptible_rrl(pos_opt: Option<&RrlPosition2058_2059>, snp_calls: &[RrlSnpCall]) -> Option<bool> {
87 let has_resistance = snp_calls
88 .iter()
89 .any(|c| c.query_base.is_some_and(|b| c.resistance_bases.contains_key(&b)));
90 if has_resistance {
91 Some(false)
92 } else {
93 pos_opt.and_then(|p| p.is_susceptible())
94 }
95}
96
97pub fn is_susceptible_rrl_by_position(pos: &RrlPosition2058_2059) -> Option<bool> {
99 pos.is_susceptible()
100}
101
102pub fn is_susceptible_rrl_by_snp_calls(snp_calls: &[RrlSnpCall]) -> Option<bool> {
105 if snp_calls
106 .iter()
107 .any(|c| c.query_base.is_some_and(|b| c.resistance_bases.contains_key(&b)))
108 {
109 Some(false)
110 } else {
111 None
112 }
113}
114
115pub fn is_susceptible_rrl_by_snp_calls_rare(pos_opt: Option<&RrlPosition2058_2059>, snp_calls: &[RrlSnpCall]) -> Option<bool> {
119 if is_susceptible_rrl_by_position(pos_opt?) != Some(true) {
120 return None;
121 }
122 is_susceptible_rrl_by_snp_calls(snp_calls)
123}
124
125#[derive(Debug, Clone, Default, Serialize, Deserialize)]
127pub struct RrlSusceptibilityCalls {
128 pub position_2058_2059: Option<RrlPosition2058_2059>,
129 pub snp_calls: Vec<RrlSnpCall>,
130 pub is_susceptible: Option<bool>,
131 pub is_susceptible_rare: Option<bool>,
132}
133
134
135
136#[derive(Debug, Deserialize, Clone)]
137struct ResistanceVariant {
138 #[serde(rename = "Gene")]
139 gene: String,
140 #[serde(rename = "Mutation")]
141 mutation: String,
142 #[serde(rename = "drug")]
143 drug: String,
144 #[serde(rename = "type")]
145 confers: String,
146 #[serde(rename = "E.coli-nomenclature", default)]
147 ecoli_nomenclature: String,
148}
149
150fn parse_rrl_resistance_snps(csv: &str) -> RrlSnpMap {
151 let mut rdr = csv::Reader::from_reader(csv.as_bytes());
152 let mut map: RrlSnpMap = BTreeMap::new();
153 for row in rdr.deserialize::<ResistanceVariant>() {
154 let row = row.unwrap();
155 if row.gene.trim() != "rrl" {
156 continue;
157 }
158 if row.confers.trim() != "drug_resistance" {
159 continue;
160 }
161 let drug = row.drug.trim().to_string();
162 let ecoli = row.ecoli_nomenclature.trim().to_string();
163 let m = row.mutation.trim();
164 if let Some(rest) = m.strip_prefix("n.") {
165 let digits_end = rest
166 .find(|c: char| !c.is_ascii_digit())
167 .unwrap_or(rest.len());
168 if let Ok(pos1) = rest[..digits_end].parse::<usize>() {
169 let after_pos = &rest[digits_end..];
170 if let (Some(wt), Some(alt)) = (
171 after_pos.bytes().next(),
172 after_pos
173 .strip_prefix(|_: char| true)
174 .and_then(|s| s.strip_prefix('>'))
175 .and_then(|s| s.bytes().next()),
176 ) {
177 let entry = map.entry(pos1 - 1).or_insert_with(|| (wt, BTreeMap::new()));
178 let variant = entry.1.entry(alt).or_insert_with(|| (Vec::new(), ecoli.clone()));
179 if !variant.0.contains(&drug) {
180 variant.0.push(drug);
181 }
182 }
183 }
184 }
185 }
186 map
187}
188
189static RRL_REFS: LazyLock<Vec<(String, String, Vec<u8>)>> =
191 LazyLock::new(|| dedup_substring_same_desc(parse_multi_fasta(REF_MYCO_RRL)));
192
193static RRL_RESISTANCE_SNPS: LazyLock<BTreeMap<&'static str, RrlSnpMap>> = LazyLock::new(|| {
194 [
195 ("Mycobacterium abscessus", include_str!("../../res/sequences/ntm-db/db/Mycobacterium_abscessus/variants.csv")),
196 ("Mycobacterium avium", include_str!("../../res/sequences/ntm-db/db/Mycobacterium_avium/variants.csv")),
197 ("Mycobacterium intracellulare", include_str!("../../res/sequences/ntm-db/db/Mycobacterium_intracellulare/variants.csv")),
198 ]
199 .into_iter()
200 .map(|(species, csv)| (species, parse_rrl_resistance_snps(csv)))
201 .collect()
202});
203
204
205#[derive(Clone, Debug, Serialize, Deserialize)]
206pub struct RrlSnpCall {
207 pub ref_pos: usize,
209 pub query_base: Option<u8>,
211 pub wt_base: u8,
213 #[serde(with = "super::serde_helpers::u8_btree_map")]
215 pub resistance_bases: BTreeMap<u8, (Vec<String>, String)>,
216}
217
218impl RrlSnpCall {
219 pub fn call_tag(&self) -> String {
220 let ecoli_prefix: Option<&str> = self.resistance_bases.values()
222 .next()
223 .map(|(_, nom)| &nom[..nom.len().saturating_sub(1)]);
224
225 match self.query_base {
226 None => "".to_string(),
227 Some(b) if b == self.wt_base => match ecoli_prefix {
228 Some(p) => format!("{} (wt, E.coli {})", b as char, p),
229 None => format!("{} (wt)", b as char),
230 },
231 Some(b) if self.resistance_bases.contains_key(&b) => {
232 let (drugs, ecoli) = &self.resistance_bases[&b];
233 format!("{} ({}, E.coli {})", b as char, drugs.join(", "), ecoli)
234 }
235 Some(b) => match ecoli_prefix {
236 Some(p) => format!("{} (mutation, E.coli {}{})", b as char, p, b as char),
237 None => format!("{} (mutation)", b as char),
238 },
239 }
240 }
241}
242
243fn call_rrl_snps(snps: &RrlSnpMap, ga: &GappedAlignment) -> Vec<RrlSnpCall> {
244 snps.iter()
245 .map(|(&ref_pos, (wt_base, alt_to_drugs))| {
246 let query_base =
247 base_at_ref_pos(&ga.gapped_query, &ga.gapped_ref, ga.ref_start, ref_pos);
248 RrlSnpCall {
249 ref_pos,
250 query_base,
251 wt_base: *wt_base,
252 resistance_bases: alt_to_drugs.clone(),
253 }
254 })
255 .collect()
256}
257
258
259pub(super) fn find_rrl_ntm_display_window(
288 bases: &[u8],
289 peak_locs: &[u16],
290) -> Option<(u16, u16, bool, u16)> {
291 const LEFT: usize = 9;
292 const RIGHT: usize = 10;
293
294 let anchor_len: u16 = RRL_ANCHOR_L.len() as u16;
295
296 if let Some(hit) = bases
297 .windows(anchor_len as usize)
298 .position(|w| w.eq_ignore_ascii_case(RRL_ANCHOR_L))
299 {
300 let snp_pos = hit + anchor_len as usize;
301 if let Some(window) = super::scan_window(snp_pos, LEFT, RIGHT, peak_locs) {
302 return Some((window.0, window.1, false, snp_pos as u16));
303 }
304 }
305
306 let rc_anchor_r: Vec<u8> = reverse_complement(RRL_ANCHOR_R);
307 if let Some(hit) = bases
308 .windows(rc_anchor_r.len())
309 .position(|w| w.eq_ignore_ascii_case(&rc_anchor_r))
310 {
311 let snp_pos_comp = hit + rc_anchor_r.len();
312 if let Some(window) = super::scan_window(snp_pos_comp, RIGHT, LEFT, peak_locs) {
313 return Some((window.0, window.1, true, snp_pos_comp as u16));
314 }
315 }
316
317 None
318}
319
320pub fn identify_sequence_rrl_ntm(query: &[u8]) -> Vec<SeqIdHit> {
323 let rc = reverse_complement(query);
324 let rrl_position_2058_2059_opt = RrlPosition2058_2059::from_single_read(query);
325
326 let mut hits: Vec<SeqIdHit> = RRL_REFS
327 .iter()
328 .filter(|(_, _, refseq)| refseq.len() >= super::MIN_RRL_REF_LEN)
329 .map(|(accession, description, refseq)| {
330 let fwd = align_to_ref(query, refseq);
331 let rev = align_to_ref(&rc, refseq);
332 let (ga, is_reverse) = if rev.identity > fwd.identity {
333 (rev, true)
334 } else {
335 (fwd, false)
336 };
337 let rrl_snp_calls = if accession.contains(':') {
338 RRL_RESISTANCE_SNPS
339 .get(description.as_str())
340 .map(|snps| call_rrl_snps(snps, &ga))
341 .unwrap_or_default()
342 } else {
343 vec![]
344 };
345 SeqIdHit {
346 accession: accession.clone(),
347 description: description.clone(),
348 identity: ga.identity,
349 is_reverse,
350 kansasii_gastri_snp_calls: vec![],
351 marinum_ulcerans_snp_calls: vec![],
352 rrl_snp_calls,
353 rrs_snp_calls: vec![],
354 erm41_snp_calls: vec![],
355 pnca_snp_calls: vec![],
356 aligned_query: ga.gapped_query,
357 aligned_ref: ga.gapped_ref,
358 ref_start: ga.ref_start,
359 erm41_position_28_opt: None,
360 rrl_position_2058_2059_opt,
361 }
362 })
363 .collect();
364
365 hits.sort_by(|a, b| {
366 b.identity
367 .partial_cmp(&a.identity)
368 .unwrap_or(std::cmp::Ordering::Equal)
369 });
370 hits
371}