1use super::reverse_complement;
2use super::{ERM41_ANCHOR_L, ERM41_ANCHOR_R, ERM41_FWD_END, ERM41_FWD_START};
3use super::{GappedAlignment, SeqIdHit, align_to_ref, base_at_ref_pos, parse_fasta_seq};
4use serde::{Deserialize, Serialize};
5use std::collections::BTreeMap;
6use std::sync::LazyLock;
7
8type Erm41LofSnpMap = BTreeMap<usize, (u8, BTreeMap<u8, (String, Option<String>)>)>;
11
12#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize)]
13pub enum Erm41Position28 {
14 C28, T28, G28, A28, Undetermined, }
20
21impl std::fmt::Display for Erm41Position28 {
22 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
23 match self {
24 Self::C28 => write!(f, "C28"),
25 Self::T28 => write!(f, "T28"),
26 Self::G28 => write!(f, "G28"),
27 Self::A28 => write!(f, "A28"),
28 Self::Undetermined => write!(f, "Position 28 not found"),
29 }
30 }
31}
32
33pub fn is_susceptible_erm41(pos_opt: Option<&Erm41Position28>, snp_calls: &[Erm41LofCall]) -> Option<bool> {
40 let has_lof = snp_calls
41 .iter()
42 .any(|c| c.query_base.is_some_and(|b| c.lof_alts.contains_key(&b)));
43 if has_lof {
44 Some(true)
45 } else {
46 pos_opt.and_then(|p| p.is_susceptible())
47 }
48}
49
50pub fn is_susceptible_erm41_by_lof_snps(snp_calls: &[Erm41LofCall]) -> Option<bool> {
53 if snp_calls
54 .iter()
55 .any(|c| c.query_base.is_some_and(|b| c.lof_alts.contains_key(&b)))
56 {
57 Some(true)
58 } else {
59 None
60 }
61}
62
63pub fn is_susceptible_erm41_by_position28(pos: &Erm41Position28) -> Option<bool> {
65 pos.is_susceptible()
66}
67
68#[derive(Debug, Clone, Default, Serialize, Deserialize)]
70pub struct Erm41SusceptibilityCalls {
71 pub position_28: Option<Erm41Position28>,
72 pub lof_snp_calls: Vec<Erm41LofCall>,
73 pub is_susceptible: Option<bool>,
74}
75
76impl Erm41Position28 {
77 pub fn is_susceptible(&self) -> Option<bool> {
80 match self {
81 Self::C28 | Self::G28 | Self::A28 => Some(true),
82 Self::T28 => Some(false),
83 Self::Undetermined => None,
84 }
85 }
86
87 fn call_position28(read: &[u8]) -> Option<u8> {
91 let anchor_len = ERM41_ANCHOR_L.len();
92 let hit = read
93 .windows(anchor_len)
94 .position(|w| w.eq_ignore_ascii_case(ERM41_ANCHOR_L))?;
95 let pos28 = hit + anchor_len;
96 let base = read.get(pos28).copied()?.to_ascii_uppercase();
97 let right_start = pos28 + 1;
98 let right_end = right_start + ERM41_ANCHOR_R.len();
99 if right_end > read.len() {
100 return None;
101 }
102 let right_ok = read[right_start..right_end].eq_ignore_ascii_case(ERM41_ANCHOR_R);
103 if !right_ok {
104 return None;
105 }
106 Some(base)
107 }
108
109 fn from_base(base: Option<u8>) -> Option<Self> {
112 match base {
113 Some(b'C') => Some(Self::C28),
114 Some(b'T') => Some(Self::T28),
115 Some(b'G') => Some(Self::G28),
116 Some(b'A') => Some(Self::A28),
117 _ => None,
118 }
119 }
120
121 pub fn from_single_read(read: &[u8]) -> Self {
125 if let Some(call) = Self::from_base(Self::call_position28(read)) {
126 return call;
127 }
128 let rc = reverse_complement(read);
129 Self::from_base(Self::call_position28(&rc)).unwrap_or(Self::Undetermined)
130 }
131}
132
133fn translate_codon(codon: &[u8]) -> u8 {
134 if codon.len() < 3 {
135 return 0;
136 }
137 let c = [
138 codon[0].to_ascii_uppercase(),
139 codon[1].to_ascii_uppercase(),
140 codon[2].to_ascii_uppercase(),
141 ];
142 match &c[..] {
143 b"TTT" | b"TTC" => b'F',
144 b"TTA" | b"TTG" | b"CTT" | b"CTC" | b"CTA" | b"CTG" => b'L',
145 b"ATT" | b"ATC" | b"ATA" => b'I',
146 b"ATG" => b'M',
147 b"GTT" | b"GTC" | b"GTA" | b"GTG" => b'V',
148 b"TCT" | b"TCC" | b"TCA" | b"TCG" | b"AGT" | b"AGC" => b'S',
149 b"CCT" | b"CCC" | b"CCA" | b"CCG" => b'P',
150 b"ACT" | b"ACC" | b"ACA" | b"ACG" => b'T',
151 b"GCT" | b"GCC" | b"GCA" | b"GCG" => b'A',
152 b"TAT" | b"TAC" => b'Y',
153 b"TAA" | b"TAG" | b"TGA" => b'*',
154 b"CAT" | b"CAC" => b'H',
155 b"CAA" | b"CAG" => b'Q',
156 b"AAT" | b"AAC" => b'N',
157 b"AAA" | b"AAG" => b'K',
158 b"GAT" | b"GAC" => b'D',
159 b"GAA" | b"GAG" => b'E',
160 b"TGT" | b"TGC" => b'C',
161 b"TGG" => b'W',
162 b"CGT" | b"CGC" | b"CGA" | b"CGG" | b"AGA" | b"AGG" => b'R',
163 b"GGT" | b"GGC" | b"GGA" | b"GGG" => b'G',
164 _ => 0,
165 }
166}
167
168fn three_letter_to_one(aa3: &str) -> u8 {
169 match aa3.to_ascii_uppercase().as_str() {
170 "ALA" => b'A',
171 "ARG" => b'R',
172 "ASN" => b'N',
173 "ASP" => b'D',
174 "CYS" => b'C',
175 "GLN" => b'Q',
176 "GLU" => b'E',
177 "GLY" => b'G',
178 "HIS" => b'H',
179 "ILE" => b'I',
180 "LEU" => b'L',
181 "LYS" => b'K',
182 "MET" => b'M',
183 "PHE" => b'F',
184 "PRO" => b'P',
185 "SER" => b'S',
186 "THR" => b'T',
187 "TRP" => b'W',
188 "TYR" => b'Y',
189 "VAL" => b'V',
190 _ => 0,
191 }
192}
193
194fn empty_string_as_none<'de, D>(de: D) -> Result<Option<String>, D::Error>
195where
196 D: serde::Deserializer<'de>,
197{
198 let s = Option::<String>::deserialize(de)?;
199 Ok(s.filter(|s| !s.trim().is_empty()))
200}
201
202#[derive(Debug, Deserialize)]
203struct Erm41LofRow {
204 #[serde(rename = "Gene")]
205 gene: String,
206 #[serde(rename = "Mutation")]
207 mutation: String,
208 #[serde(rename = "type")]
209 variant_type: String,
210 #[serde(rename = "drug", default, deserialize_with = "empty_string_as_none")]
212 drug: Option<String>,
213}
214
215fn parse_erm41_lof_snps(
227 csv: &str,
228 refseq: &[u8],
229) -> Erm41LofSnpMap {
230 let mut rdr = csv::Reader::from_reader(csv.as_bytes());
231 let mut map: Erm41LofSnpMap = BTreeMap::new();
232 for row in rdr.deserialize::<Erm41LofRow>() {
233 let row = match row {
234 Ok(r) => r,
235 Err(_) => continue,
236 };
237 if row.gene.trim() != "erm(41)" || row.variant_type.trim() != "loss_of_function" {
238 continue;
239 }
240 let m = row.mutation.trim();
241 let rest = match m.strip_prefix("p.") {
242 Some(r) if r.len() >= 4 => r,
243 _ => continue,
244 };
245 let digits_end = rest[3..]
246 .find(|c: char| !c.is_ascii_digit())
247 .map(|i| i + 3)
248 .unwrap_or(rest.len());
249 if digits_end <= 3 {
250 continue;
251 }
252 let prot_pos: usize = match rest[3..digits_end].parse() {
253 Ok(p) => p,
254 Err(_) => continue,
255 };
256 let target_str = &rest[digits_end..];
257 let target_aa = if target_str == "*" {
258 b'*'
259 } else if target_str.len() >= 3 {
260 three_letter_to_one(&target_str[..3])
261 } else {
262 continue;
263 };
264 if target_aa == 0 {
265 continue;
266 }
267 let codon_start = (prot_pos - 1) * 3;
268 if codon_start + 3 > refseq.len() {
269 continue;
270 }
271 let ref_codon = [
272 refseq[codon_start].to_ascii_uppercase(),
273 refseq[codon_start + 1].to_ascii_uppercase(),
274 refseq[codon_start + 2].to_ascii_uppercase(),
275 ];
276 for codon_pos in 0..3usize {
277 let wt_base = ref_codon[codon_pos];
278 for &alt in b"ACGT" {
279 if alt == wt_base {
280 continue;
281 }
282 let mut alt_codon = ref_codon;
283 alt_codon[codon_pos] = alt;
284 if translate_codon(&alt_codon) == target_aa {
285 let ref_pos = codon_start + codon_pos;
286 let entry = map
287 .entry(ref_pos)
288 .or_insert_with(|| (wt_base, BTreeMap::new()));
289 entry
290 .1
291 .entry(alt)
292 .or_insert_with(|| (m.to_string(), row.drug.clone()));
293 }
294 }
295 }
296 }
297 map
298}
299
300static ERM41_LOF_SNPS: LazyLock<BTreeMap<&'static str, Erm41LofSnpMap>> = LazyLock::new(|| {
301 [(
302 super::DESC_ABSCESSUS,
303 include_str!("../../res/sequences/ntm-db/db/Mycobacterium_abscessus/variants.csv"),
304 super::REF_ERM41_ABSCESSUS,
305 )]
306 .into_iter()
307 .map(|(desc, csv, fasta)| (desc, parse_erm41_lof_snps(csv, &parse_fasta_seq(fasta))))
308 .collect()
309});
310
311#[derive(Clone, Debug, Serialize, Deserialize)]
312pub struct Erm41LofCall {
313 pub ref_pos: usize,
314 pub query_base: Option<u8>,
315 pub wt_base: u8,
316 #[serde(with = "super::serde_helpers::u8_btree_map")]
318 pub lof_alts: BTreeMap<u8, (String, Option<String>)>,
319}
320
321impl Erm41LofCall {
322 pub fn call_tag(&self) -> String {
323 match self.query_base {
324 None => "".to_string(),
325 Some(b) if b == self.wt_base => String::new(),
326 Some(b) if self.lof_alts.contains_key(&b) => {
327 format!("{}{}{} ({})", self.wt_base as char, self.ref_pos, b as char, self.lof_alts[&b].0)
328 }
329 Some(b) => format!("{}{}{}", self.wt_base as char, self.ref_pos, b as char),
330 }
331 }
332}
333
334fn call_erm41_lof_snps(
340 snps: &Erm41LofSnpMap,
341 ga: &GappedAlignment,
342) -> Vec<Erm41LofCall> {
343 snps.iter()
344 .map(|(&ref_pos, (wt_base, lof_alts))| {
345 let query_base =
346 base_at_ref_pos(&ga.gapped_query, &ga.gapped_ref, ga.ref_start, ref_pos);
347 Erm41LofCall {
348 ref_pos,
349 query_base,
350 wt_base: *wt_base,
351 lof_alts: lof_alts.clone(),
352 }
353 })
354 .collect()
355}
356
357pub(super) fn find_erm41_display_window(
358 bases: &[u8],
359 peak_locs: &[u16],
360) -> Option<(u16, u16, bool, u16)> {
361 const LEFT: usize = 9;
363 const RIGHT: usize = 11;
364
365 let anchor_len: u16 = ERM41_ANCHOR_L.len() as u16;
366
367 if let Some(hit) = bases
369 .windows(anchor_len as usize)
370 .position(|w| w.eq_ignore_ascii_case(ERM41_ANCHOR_L))
371 {
372 let pos28 = hit + anchor_len as usize;
373 if let Some(window) = super::scan_window(pos28, LEFT, RIGHT, peak_locs) {
374 return Some((window.0, window.1, false, pos28 as u16));
375 }
376 }
377
378 let rc_anchor_r: Vec<u8> = reverse_complement(ERM41_ANCHOR_R);
379 if let Some(hit) = bases
380 .windows(rc_anchor_r.len())
381 .position(|w| w.eq_ignore_ascii_case(&rc_anchor_r))
382 {
383 let pos28_comp = hit + rc_anchor_r.len();
384 if let Some(window) = super::scan_window(pos28_comp, RIGHT, LEFT, peak_locs) {
385 return Some((window.0, window.1, true, pos28_comp as u16));
386 }
387 }
388
389 None
390}
391
392pub fn identify_sequence_erm41(query: &[u8]) -> Vec<SeqIdHit> {
394 let query = super::trim_start_end(query, ERM41_FWD_START, ERM41_FWD_END);
395 let rc = reverse_complement(query);
396 let erm41_position_28 = Erm41Position28::from_single_read(query);
397
398 let refs: &[(&str, &str, &str)] = &[
399 (
400 super::REF_ERM41_ABSCESSUS,
401 "REF_ERM41_ABSCESSUS",
402 super::DESC_ABSCESSUS,
403 ),
404 (
405 super::REF_ERM41_BOLLETII,
406 "REF_ERM41_BOLLETII",
407 super::DESC_BOLLETII,
408 ),
409 (
410 super::REF_ERM41_MASSILENSE,
411 "REF_ERM41_MASSILENSE",
412 super::DESC_MASSILIENSE,
413 ),
414 ];
415
416 let mut hits: Vec<SeqIdHit> = refs
417 .iter()
418 .map(|(fasta, accession, description)| {
419 let refseq = parse_fasta_seq(fasta);
420 let fwd = align_to_ref(query, &refseq);
421 let rev = align_to_ref(&rc, &refseq);
422 let (ga, is_reverse) = if rev.identity > fwd.identity {
423 (rev, true)
424 } else {
425 (fwd, false)
426 };
427 let erm41_snp_calls = ERM41_LOF_SNPS
430 .get(super::DESC_ABSCESSUS)
431 .map(|snps| call_erm41_lof_snps(snps, &ga))
432 .unwrap_or_default();
433 SeqIdHit {
434 accession: accession.to_string(),
435 description: description.to_string(),
436 identity: ga.identity,
437 is_reverse,
438 kansasii_gastri_snp_calls: vec![],
439 marinum_ulcerans_snp_calls: vec![],
440 rrl_snp_calls: vec![],
441 rrs_snp_calls: vec![],
442 erm41_snp_calls,
443 pnca_snp_calls: vec![],
444 aligned_query: ga.gapped_query,
445 aligned_ref: ga.gapped_ref,
446 ref_start: ga.ref_start,
447 erm41_position_28_opt: Some(erm41_position_28),
448 rrl_position_2058_2059_opt: None,
449 }
450 })
451 .collect();
452 hits.sort_by(|a, b| {
453 b.identity
454 .partial_cmp(&a.identity)
455 .unwrap_or(std::cmp::Ordering::Equal)
456 });
457 hits
458}