1pub mod batch;
17pub mod bed;
18pub mod erm41;
19pub mod hsp65;
20pub mod ntfy_notify;
21pub mod pnca;
22pub mod rpob;
23pub mod rrs;
24pub mod rrl;
25pub mod serde_helpers;
26pub mod tb_data;
27
28pub use batch::SampleSusceptibilityRecord;
29
30use serde::{Deserialize, Serialize};
31
32use erm41::{Erm41LofCall, Erm41Position28, Erm41SusceptibilityCalls};
33use hsp65::{KansasiiGastriSnpCall, MarinumUlceransSnpCall};
34use pnca::{PncaSnpCall, PncaSusceptibilityCalls};
35use rrl::{RrlPosition2058_2059, RrlSnpCall, RrlSusceptibilityCalls};
36use rrs::{RrsSnpCall, RrsSusceptibilityCalls};
37
38pub const MIN_SEQ_ID_IDENTITY: f32 = 80.0;
39
40const MIN_RRS_REF_LEN: usize = 1200;
41const MIN_RRL_REF_LEN: usize = 1200;
42const MIN_RPOB_REF_LEN: usize = 400;
43const MIN_PNCA_REF_LEN: usize = 500;
44
45pub const DESC_ABSCESSUS: &str = "M. abscessus subsp. abscessus";
46pub const DESC_BOLLETII: &str = "M. abscessus subsp. bolletii";
47pub const DESC_MASSILIENSE: &str = "M. abscessus subsp. massiliense";
48
49const ERM41_FWD_START: &[u8] = b"gtgtccggccaacggtcgcg";
50const ERM41_FWD_END: &[u8] = b"tggtgatcaggcggcgctga";
51const ERM41_ANCHOR_L: &[u8] = b"GCCAACGGTCGCGACGCCAG";
52const ERM41_ANCHOR_R: &[u8] = b"GGGGCTGGTATCCGCTCACT";
53
54const RRL_ANCHOR_L: &[u8] = b"CGTTACGCGCGGCAGGACGA";
55const RRL_ANCHOR_R: &[u8] = b"AGACCCCGGGACCTTCACTA";
56
57const PNCA_FWD_START: &[u8] = b"GCGTCGGTAGGCAAACTGCC";
58const PNCA_FWD_END: &[u8] = b"AGTTGGTTTGCAGCTCCTGA";
59
60const REF_ERM41_ABSCESSUS: &str =
61 include_str!("../../res/sequences/erm41/erm41_abscessus_ATCC_19977.fasta");
62const REF_ERM41_BOLLETII: &str =
63 include_str!("../../res/sequences/erm41/erm41_bolletii_CIP_108541.fasta");
64const REF_ERM41_MASSILENSE: &str =
65 include_str!("../../res/sequences/erm41/erm41_massiliense_CCUG_48898.fasta");
66
67const ACC_GASTRI: &str = "AF547836";
68const ACC_KANSASII: &str = "AF547849";
69const ACC_MARINUM: &str = "AY299134";
70const ACC_ULCERANS: &str = "AY299145";
71const KANSASII_GASTRI_ACCS: &[&str] = &[ACC_GASTRI, ACC_KANSASII];
72const MARINUM_ULCERANS_ACCS: &[&str] = &[ACC_MARINUM, ACC_ULCERANS];
73
74const REF_MYCO_RRS: &str = include_str!("../../res/sequences/myco_rrs.fasta");
76const REF_MYCO_HSP65: &str = include_str!("../../res/sequences/myco_hsp65.fasta");
78const REF_MYCO_RPOB: &str = include_str!("../../res/sequences/myco_rpob.fasta");
80const REF_MYCO_RRL: &str = include_str!("../../res/sequences/myco_rrl.fasta");
82const REF_PNCA: &str = concat!(
87 include_str!("../../res/sequences/pnca/pnca_h37rv.fasta"),
88 include_str!("../../res/sequences/pnca/pnca_bovis_AF2122_97.fasta"),
89 include_str!("../../res/sequences/pnca/pnca_canettii_CIPT_140010059.fasta"),
90);
91
92
93#[derive(Debug, Clone, Default, Serialize, Deserialize)]
95pub struct SusceptibilityCalls {
96 pub erm41: Erm41SusceptibilityCalls,
97 pub rrl: RrlSusceptibilityCalls,
98 pub rrs: RrsSusceptibilityCalls,
99 pub pnca: PncaSusceptibilityCalls,
100}
101
102impl std::fmt::Display for SusceptibilityCalls {
103 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104 let mut parts: Vec<String> = Vec::new();
105
106 let mut erm: Vec<String> = Vec::new();
107 if let Some(pos) = &self.erm41.position_28 {
108 erm.push(pos.to_string());
109 }
110 for c in &self.erm41.lof_snp_calls {
111 let tag = c.call_tag();
112 if !tag.is_empty() {
113 erm.push(tag);
114 }
115 }
116 if !erm.is_empty() {
117 parts.push(format!("{}", erm.join(", ")));
118 }
119
120 let mut rrl: Vec<String> = Vec::new();
121 if let Some(pos) = &self.rrl.position_2058_2059 {
122 rrl.push(pos.to_string());
123 }
124 for c in &self.rrl.snp_calls {
125 rrl.push(c.call_tag());
126 }
127 if !rrl.is_empty() {
128 parts.push(format!("{}", rrl.join(", ")));
129 }
130
131 let rrs: Vec<String> = self.rrs.snp_calls.iter()
132 .map(|c| c.call_tag())
133 .filter(|t| !t.is_empty())
134 .collect();
135 if !rrs.is_empty() {
136 parts.push(format!("{}", rrs.join(", ")));
137 }
138
139 let pnca: Vec<String> = self
140 .pnca
141 .snp_calls
142 .iter()
143 .filter(|c| !c.call_tag().is_empty())
144 .map(|c| format!("{} {}", c.site_label(), c.call_tag()))
145 .collect();
146 if !pnca.is_empty() {
147 parts.push(format!("{}", pnca.join(", ")));
148 }
149
150 write!(f, "{}", parts.join(" | "))
151 }
152}
153
154pub fn reverse_complement(seq: &[u8]) -> Vec<u8> {
155 seq.iter()
156 .rev()
157 .map(|&b| match b.to_ascii_uppercase() {
158 b'A' => b'T',
159 b'T' => b'A',
160 b'G' => b'C',
161 b'C' => b'G',
162 _ => b'N',
163 })
164 .collect()
165}
166
167pub fn trim_start_end<'a>(seq: &'a [u8], fwd_start: &[u8], fwd_end: &[u8]) -> &'a [u8] {
176 let rc_start: Vec<u8> = reverse_complement(fwd_end);
177 let rc_end: Vec<u8> = reverse_complement(fwd_start);
178
179 let find_start = |p: &[u8]| seq.windows(p.len()).position(|w| w.eq_ignore_ascii_case(p));
180 let find_end = |p: &[u8]| {
181 seq.windows(p.len())
182 .rposition(|w| w.eq_ignore_ascii_case(p))
183 .map(|pos| pos + p.len())
184 };
185
186 let start = [fwd_start, rc_start.as_slice()]
187 .into_iter()
188 .filter_map(find_start)
189 .min()
190 .unwrap_or(0);
191
192 let end = [fwd_end, rc_end.as_slice()]
193 .into_iter()
194 .filter_map(find_end)
195 .max()
196 .unwrap_or(seq.len());
197
198 &seq[start..end.min(seq.len())]
199}
200
201
202pub fn trim_to_min_quality<'a>(seq: &'a [u8], qual: &[u8], min_q: u8) -> Option<&'a [u8]> {
209 const WINDOW: usize = 15;
210
211 let n = seq.len().min(qual.len());
212
213 if n < WINDOW {
216 let start = (0..n).find(|&i| qual[i] >= min_q).unwrap_or(n);
217 let end = (0..n).rev().find(|&i| qual[i] >= min_q).map(|i| i + 1).unwrap_or(0);
218 return if start < end { Some(&seq[start..end]) } else { None };
219 }
220
221 let threshold = min_q as u32 * WINDOW as u32;
222
223 let start = (0..=(n - WINDOW))
225 .find(|&i| qual[i..i + WINDOW].iter().map(|&q| q as u32).sum::<u32>() >= threshold)
226 .unwrap_or(n);
227
228 let end = (WINDOW..=n)
230 .rev()
231 .find(|&e| qual[e - WINDOW..e].iter().map(|&q| q as u32).sum::<u32>() >= threshold)
232 .unwrap_or(0);
233
234 if start < end { Some(&seq[start..end]) } else { None }
235}
236
237pub(crate) fn parse_fasta_seq(fasta: &str) -> Vec<u8> {
239 fasta
240 .lines()
241 .filter(|l| !l.starts_with('>'))
242 .flat_map(|l| l.bytes().filter(|b| b.is_ascii_alphabetic()))
243 .collect()
244}
245
246fn parse_multi_fasta(fasta: &str) -> Vec<(String, String, Vec<u8>)> {
248 let mut result = Vec::new();
249 let mut cur_acc = String::new();
250 let mut cur_desc = String::new();
251 let mut cur_seq: Vec<u8> = Vec::new();
252 for line in fasta.lines() {
253 if let Some(rest) = line.strip_prefix('>') {
254 if !cur_acc.is_empty() {
255 result.push((
256 cur_acc.clone(),
257 cur_desc.clone(),
258 std::mem::take(&mut cur_seq),
259 ));
260 }
261 let mut words = rest.splitn(4, ' ');
262 cur_acc = words.next().unwrap_or("").to_string();
263 let genus = words.next().unwrap_or("");
264 let species = words.next().unwrap_or("");
265 cur_desc = format!("{} {}", genus, species).trim().to_string();
266 let mut qualifier_words = words.next().unwrap_or("").split_whitespace();
274 if let Some(qualifier @ ("variant" | "subsp." | "subspecies")) =
275 qualifier_words.next()
276 && let Some(epithet) = qualifier_words.next()
277 {
278 cur_desc = format!("{cur_desc} {qualifier} {epithet}");
279 }
280 } else {
281 cur_seq.extend(line.bytes().filter(|b| b.is_ascii_alphabetic()));
282 }
283 }
284 if !cur_acc.is_empty() {
285 result.push((cur_acc, cur_desc, cur_seq));
286 }
287 result
288}
289
290fn dedup_substring_same_desc(
295 mut entries: Vec<(String, String, Vec<u8>)>,
296) -> Vec<(String, String, Vec<u8>)> {
297 entries.sort_by_key(|(_, _, s)| std::cmp::Reverse(s.len()));
299 let upper: Vec<Vec<u8>> = entries
300 .iter()
301 .map(|(_, _, s)| s.iter().map(|b| b.to_ascii_uppercase()).collect())
302 .collect();
303 let mut keep = vec![true; entries.len()];
304 for i in 0..entries.len() {
305 if !keep[i] {
306 continue;
307 }
308 for j in (i + 1)..entries.len() {
309 if !keep[j] {
310 continue;
311 }
312 if entries[i].1 == entries[j].1
313 && upper[i].windows(upper[j].len()).any(|w| w == upper[j])
314 {
315 keep[j] = false;
316 }
317 }
318 }
319 entries
320 .into_iter()
321 .zip(keep)
322 .filter_map(|(e, k)| k.then_some(e))
323 .collect()
324}
325
326fn format_pairwise_alignment(
327 accession: &str,
328 description: &str,
329 identity: f32,
330 is_reverse: bool,
331 gapped_query: &[u8],
332 gapped_ref: &[u8],
333 ref_start: usize,
334) -> String {
335 let match_line: Vec<u8> = gapped_ref
336 .iter()
337 .zip(gapped_query.iter())
338 .map(|(&r, &q)| {
339 if r == b'-' || q == b'-' {
340 b' '
341 } else if r.eq_ignore_ascii_case(&q) {
342 b'|'
343 } else {
344 b'.'
345 }
346 })
347 .collect();
348
349 let orient = if is_reverse {
350 "Reverse Complement"
351 } else {
352 "Forward"
353 };
354 let mut out = format!(
355 "Query vs {} ({}) — {:.1}% identity\n\n",
356 accession, description, identity
357 );
358 out.push_str(&format!("Orientation: {orient}\n\n"));
359
360 let line_width = 60usize;
361 let len = gapped_ref.len();
362 let mut ref_pos = ref_start + 1;
363 let mut query_pos = 1usize;
364 for chunk_start in (0..len).step_by(line_width) {
365 let chunk_end = (chunk_start + line_width).min(len);
366 let ref_chunk = std::str::from_utf8(&gapped_ref[chunk_start..chunk_end]).unwrap_or("");
367 let match_chunk = std::str::from_utf8(&match_line[chunk_start..chunk_end]).unwrap_or("");
368 let query_chunk = std::str::from_utf8(&gapped_query[chunk_start..chunk_end]).unwrap_or("");
369
370 out.push_str(&format!("Ref {:5}: {}\n", ref_pos, ref_chunk));
371 out.push_str(&format!(" {match_chunk}\n"));
372 out.push_str(&format!("Query {:5}: {}\n\n", query_pos, query_chunk));
373
374 ref_pos += ref_chunk.bytes().filter(|&b| b != b'-').count();
375 query_pos += query_chunk.bytes().filter(|&b| b != b'-').count();
376 }
377 out
378}
379
380#[derive(Debug, Clone)]
382pub struct GappedAlignment {
383 pub identity: f32,
385 pub gapped_query: Vec<u8>,
387 pub gapped_ref: Vec<u8>,
390 pub ref_start: usize,
392}
393
394pub fn align_to_ref(query: &[u8], reference: &[u8]) -> GappedAlignment {
401 use bio::alignment::AlignmentOperation::{Del, Ins, Match, Subst, Xclip, Yclip};
402 use bio::alignment::pairwise::Aligner;
403
404 if query.is_empty() || reference.is_empty() {
405 return GappedAlignment {
406 identity: 0.0,
407 gapped_query: vec![],
408 gapped_ref: vec![],
409 ref_start: 0,
410 };
411 }
412
413 let score_fn = |a: u8, b: u8| -> i32 {
414 if a.to_ascii_uppercase() == b.to_ascii_uppercase() { 1 } else { -1 }
415 };
416 let mut aligner = Aligner::new(-5, -1, &score_fn);
417 let alignment = aligner.semiglobal(query, reference);
418
419 let mut gapped_query = Vec::new();
420 let mut gapped_ref = Vec::new();
421 let mut qi = alignment.xstart;
422 let mut ri = alignment.ystart;
423
424 for op in &alignment.operations {
425 match op {
426 Match | Subst => {
427 gapped_query.push(query[qi]);
428 gapped_ref.push(reference[ri]);
429 qi += 1;
430 ri += 1;
431 }
432 Del => {
433 gapped_query.push(b'-');
434 gapped_ref.push(reference[ri]);
435 ri += 1;
436 }
437 Ins => {
438 gapped_query.push(query[qi]);
439 gapped_ref.push(b'-');
440 qi += 1;
441 }
442 Xclip(k) => { qi += k; }
443 Yclip(k) => { ri += k; }
444 }
445 }
446
447 let ref_span = gapped_ref.iter().filter(|&&b| b != b'-').count();
448 let matches = gapped_query
449 .iter()
450 .zip(gapped_ref.iter())
451 .filter(|&(&q, &r)| q != b'-' && r != b'-' && q.eq_ignore_ascii_case(&r))
452 .count();
453 let identity = if ref_span > 0 {
454 matches as f32 / ref_span as f32 * 100.0
455 } else {
456 0.0
457 };
458
459 GappedAlignment {
460 identity,
461 gapped_query,
462 gapped_ref,
463 ref_start: alignment.ystart,
464 }
465}
466
467pub fn base_at_ref_pos(
470 gapped_query: &[u8],
471 gapped_ref: &[u8],
472 ref_start: usize,
473 ref_pos: usize,
474) -> Option<u8> {
475 if ref_pos < ref_start {
476 return None;
477 }
478 let mut current = ref_start;
479 for (&q, &r) in gapped_query.iter().zip(gapped_ref.iter()) {
480 if r != b'-' {
481 if current == ref_pos {
482 return if q == b'-' { None } else { Some(q.to_ascii_uppercase()) };
483 }
484 current += 1;
485 }
486 }
487 None
488}
489
490fn scan_window(
491 center: usize,
492 left: usize,
493 right: usize,
494 peak_locs: &[u16],
495) -> Option<(u16, u16)> {
496 let base_start = center.checked_sub(left)?;
497 let base_end = center + right;
498 if base_end >= peak_locs.len() {
499 return None;
500 }
501 let start_scan = peak_locs[base_start];
502 let end_scan = peak_locs[base_end];
503 if start_scan >= end_scan {
504 return None;
505 }
506 Some((start_scan, end_scan))
507}
508
509pub fn parse_ab1_sequence(data: &[u8]) -> Option<Vec<u8>> {
511 if data.len() < 34 || &data[0..4] != b"ABIF" {
512 return None;
513 }
514
515 let dir_count = i32::from_be_bytes(data[18..22].try_into().ok()?) as usize;
519 let dir_offset = i32::from_be_bytes(data[26..30].try_into().ok()?) as usize;
520
521 let mut pbas1: Option<Vec<u8>> = None;
522
523 for i in 0..dir_count {
524 let e = dir_offset + i * 28;
525 if e + 28 > data.len() {
526 break;
527 }
528 let tag_name = &data[e..e + 4];
529 let tag_number = i32::from_be_bytes(data[e + 4..e + 8].try_into().ok()?);
530 let num_elems = i32::from_be_bytes(data[e + 12..e + 16].try_into().ok()?) as usize;
532 let data_size = i32::from_be_bytes(data[e + 16..e + 20].try_into().ok()?) as usize;
533 let data_off = i32::from_be_bytes(data[e + 20..e + 24].try_into().ok()?) as usize;
534
535 if tag_name == b"PBAS" {
536 let offset = if data_size <= 4 { e + 20 } else { data_off };
538 if offset + num_elems <= data.len() {
539 let seq = data[offset..offset + num_elems].to_vec();
540 if tag_number == 2 {
541 return Some(seq); } else if tag_number == 1 {
543 pbas1 = Some(seq); }
545 }
546 }
547 }
548
549 pbas1
550}
551
552pub fn parse_ab1_quality(data: &[u8]) -> Option<Vec<u8>> {
555 if data.len() < 34 || &data[0..4] != b"ABIF" {
556 return None;
557 }
558
559 let dir_count = i32::from_be_bytes(data[18..22].try_into().ok()?) as usize;
560 let dir_offset = i32::from_be_bytes(data[26..30].try_into().ok()?) as usize;
561
562 let mut pcon1: Option<Vec<u8>> = None;
563
564 for i in 0..dir_count {
565 let e = dir_offset + i * 28;
566 if e + 28 > data.len() {
567 break;
568 }
569 let tag_name = &data[e..e + 4];
570 let tag_number = i32::from_be_bytes(data[e + 4..e + 8].try_into().ok()?);
571 let num_elems = i32::from_be_bytes(data[e + 12..e + 16].try_into().ok()?) as usize;
572 let data_size = i32::from_be_bytes(data[e + 16..e + 20].try_into().ok()?) as usize;
573 let data_off = i32::from_be_bytes(data[e + 20..e + 24].try_into().ok()?) as usize;
574
575 if tag_name == b"PCON" {
576 let offset = if data_size <= 4 { e + 20 } else { data_off };
577 if offset + num_elems <= data.len() {
578 let qual = data[offset..offset + num_elems].to_vec();
579 if tag_number == 2 {
580 return Some(qual); } else if tag_number == 1 {
582 pcon1 = Some(qual); }
584 }
585 }
586 }
587
588 pcon1
589}
590
591impl Ab1Channels {
592 pub fn from_bytes(data: &[u8]) -> Option<Self> {
597 if data.len() < 34 || &data[0..4] != b"ABIF" {
598 return None;
599 }
600
601 let dir_count = i32::from_be_bytes(data[18..22].try_into().ok()?) as usize;
602 let dir_offset = i32::from_be_bytes(data[26..30].try_into().ok()?) as usize;
603
604 let mut data_tags: std::collections::HashMap<i32, Vec<i16>> =
606 std::collections::HashMap::new();
607 let mut pbas1: Option<Vec<u8>> = None;
608 let mut pbas2: Option<Vec<u8>> = None;
609 let mut ploc1: Option<Vec<u16>> = None;
610 let mut ploc2: Option<Vec<u16>> = None;
611 let mut fwo: Option<[u8; 4]> = None;
612
613 for i in 0..dir_count {
614 let e = dir_offset + i * 28;
615 if e + 28 > data.len() {
616 break;
617 }
618 let tag_name = &data[e..e + 4];
619 let tag_number = i32::from_be_bytes(data[e + 4..e + 8].try_into().ok()?);
620 let num_elems = i32::from_be_bytes(data[e + 12..e + 16].try_into().ok()?) as usize;
621 let data_size = i32::from_be_bytes(data[e + 16..e + 20].try_into().ok()?) as usize;
622 let data_off = i32::from_be_bytes(data[e + 20..e + 24].try_into().ok()?) as usize;
623
624 let offset = if data_size <= 4 { e + 20 } else { data_off };
625
626 if tag_name == b"DATA" && (1..=12).contains(&tag_number) {
627 if offset + num_elems * 2 <= data.len() {
629 let values: Vec<i16> = (0..num_elems)
630 .map(|j| {
631 i16::from_be_bytes([data[offset + j * 2], data[offset + j * 2 + 1]])
632 })
633 .collect();
634 data_tags.insert(tag_number, values);
635 }
636 } else if tag_name == b"PBAS" {
637 if offset + num_elems <= data.len() {
638 let seq = data[offset..offset + num_elems].to_vec();
639 match tag_number {
640 2 => pbas2 = Some(seq),
641 1 => pbas1 = Some(seq),
642 _ => {}
643 }
644 }
645 } else if tag_name == b"PLOC" {
646 if offset + num_elems * 2 <= data.len() {
648 let locs: Vec<u16> = (0..num_elems)
649 .map(|j| {
650 u16::from_be_bytes([data[offset + j * 2], data[offset + j * 2 + 1]])
651 })
652 .collect();
653 match tag_number {
654 2 => ploc2 = Some(locs),
655 1 => ploc1 = Some(locs),
656 _ => {}
657 }
658 }
659 } else if tag_name == b"FWO_" && tag_number == 1 {
660 if offset + 4 <= data.len() {
662 let mut arr = [0u8; 4];
663 arr.copy_from_slice(&data[offset..offset + 4]);
664 fwo = Some(arr);
665 }
666 }
667 }
668
669 let channel_indices: [(i32, i32); 4] = [(9, 1), (10, 2), (11, 3), (12, 4)];
671 let channels: [Vec<i16>; 4] = channel_indices.map(|(preferred, fallback)| {
672 data_tags
673 .remove(&preferred)
674 .or_else(|| data_tags.remove(&fallback))
675 .unwrap_or_default()
676 });
677
678 let bases = pbas2.or(pbas1)?;
679 let peak_locs = ploc2.or(ploc1).unwrap_or_else(|| vec![0u16; bases.len()]);
680 let base_order = fwo.unwrap_or(*b"ACGT");
681
682 let erm41_view_state_opt = erm41::find_erm41_display_window(&bases, &peak_locs).map(
683 |(start, end, is_reverse, pos28_base_idx)| Erm41ViewState {
684 window: (start, end),
685 is_reverse,
686 pos28_base_idx,
687 },
688 );
689
690 let rrl_ntm_view_state_opt = rrl::find_rrl_ntm_display_window(&bases, &peak_locs).map(
691 |(start, end, is_reverse, snp_base_idx)| RrlNtmViewState {
692 window: (start, end),
693 is_reverse,
694 snp_base_idx,
695 },
696 );
697
698 Some(Self {
699 channels,
700 bases,
701 peak_locs,
702 base_order,
703 erm41_view_state_opt,
704 rrl_ntm_view_state_opt,
705 })
706 }
707}
708
709#[derive(Clone, Copy, Debug)]
716pub struct Erm41ViewState {
717 pub window: (u16, u16),
719 pub is_reverse: bool,
721 pub pos28_base_idx: u16,
723}
724
725#[derive(Clone, Copy, Debug)]
732pub struct RrlNtmViewState {
733 pub window: (u16, u16),
735 pub is_reverse: bool,
737 pub snp_base_idx: u16,
739}
740
741#[derive(Clone, Debug)]
743pub struct Ab1Channels {
744 pub channels: [Vec<i16>; 4],
747 pub bases: Vec<u8>,
749 pub peak_locs: Vec<u16>,
751 pub base_order: [u8; 4],
753 pub erm41_view_state_opt: Option<Erm41ViewState>,
755 pub rrl_ntm_view_state_opt: Option<RrlNtmViewState>,
757}
758
759impl Ab1Channels {
760 pub fn channel_for_base(&self, base: u8) -> Option<usize> {
762 self.base_order
763 .iter()
764 .position(|&b| b.eq_ignore_ascii_case(&base))
765 }
766}
767
768#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
770pub struct SeqIdHit {
771 pub accession: String,
773 pub description: String,
775 pub identity: f32,
777 pub is_reverse: bool,
779 pub kansasii_gastri_snp_calls: Vec<KansasiiGastriSnpCall>,
781 pub marinum_ulcerans_snp_calls: Vec<MarinumUlceransSnpCall>,
783 pub rrl_snp_calls: Vec<RrlSnpCall>,
785 pub rrs_snp_calls: Vec<RrsSnpCall>,
787 pub erm41_snp_calls: Vec<Erm41LofCall>,
789 pub pnca_snp_calls: Vec<PncaSnpCall>,
791 pub aligned_query: Vec<u8>,
793 pub aligned_ref: Vec<u8>,
795 pub ref_start: usize,
797 pub erm41_position_28_opt: Option<Erm41Position28>,
799 pub rrl_position_2058_2059_opt: Option<RrlPosition2058_2059>,
801}
802
803impl SeqIdHit {
804 pub fn format_pairwise_alignment(&self) -> String {
806 format_pairwise_alignment(
807 &self.accession,
808 &self.description,
809 self.identity,
810 self.is_reverse,
811 &self.aligned_query,
812 &self.aligned_ref,
813 self.ref_start,
814 )
815 }
816
817 pub fn is_kansasii(&self) -> bool {
818 self.accession == ACC_KANSASII
819 }
820 pub fn is_gastri(&self) -> bool {
821 self.accession == ACC_GASTRI
822 }
823 pub fn is_marinum(&self) -> bool {
824 self.accession == ACC_MARINUM
825 }
826 pub fn is_ulcerans(&self) -> bool {
827 self.accession == ACC_ULCERANS
828 }
829 pub fn kansasii_gastri_snp_species_call(&self) -> Option<&'static str> {
830 let gastri = self
831 .kansasii_gastri_snp_calls
832 .iter()
833 .filter(|c| c.is_gastri())
834 .count();
835 let kansasii = self
836 .kansasii_gastri_snp_calls
837 .iter()
838 .filter(|c| c.is_kansasii())
839 .count();
840 match gastri.cmp(&kansasii) {
841 std::cmp::Ordering::Greater => Some("M. gastri"),
842 std::cmp::Ordering::Less => Some("M. kansasii"),
843 std::cmp::Ordering::Equal => None,
844 }
845 }
846 pub fn marinum_ulcerans_snp_species_call(&self) -> Option<&'static str> {
847 let marinum = self
848 .marinum_ulcerans_snp_calls
849 .iter()
850 .filter(|c| c.is_marinum())
851 .count();
852 let ulcerans = self
853 .marinum_ulcerans_snp_calls
854 .iter()
855 .filter(|c| c.is_ulcerans())
856 .count();
857 match marinum.cmp(&ulcerans) {
858 std::cmp::Ordering::Greater => Some("M. marinum"),
859 std::cmp::Ordering::Less => Some("M. ulcerans"),
860 std::cmp::Ordering::Equal => None,
861 }
862 }
863}
864
865
866#[derive(Clone, Debug)]
873pub struct SeqData {
874 pub chromatogram_opt: Option<Ab1Channels>,
875 pub seq_id_hits: Vec<SeqIdHit>,
876 pub length: usize,
877 pub trimmed_length: usize,
878 pub trimmed_avg_quality_opt: Option<f32>,
879}
880
881const PDF_PAGE_W: f32 = 297.0; const PDF_PAGE_H: f32 = 210.0;
885const PDF_MARGIN_L: f32 = 10.0;
886const PDF_MARGIN_B: f32 = 12.0;
887const PDF_MARGIN_T: f32 = 8.0;
888const PDF_ROW_H: f32 = 5.5;
889
890const PDF_COL_X: [f32; 6] = [0.0, 24.0, 39.0, 110.0, 130.0, 200.0];
892const PDF_TABLE_W: f32 = 270.0; const PDF_COL_HEADERS: [&str; 6] = [
894 "Sample ID", "Gene", "Species", "Susceptible", "Calls", "Filename",
895];
896
897pub fn build_report_pdf(
901 records: &[batch::SampleSusceptibilityRecord],
902 report_max_age_days: u32,
903) -> Vec<u8> {
904 use printpdf::*;
905
906 let max_age = std::time::Duration::from_secs(u64::from(report_max_age_days) * 24 * 60 * 60);
907 let now = std::time::SystemTime::now();
908
909 let filtered: Vec<&batch::SampleSusceptibilityRecord> = records
910 .iter()
911 .filter(|r| r.gene.is_some() && r.identity.is_some_and(|i| i >= MIN_SEQ_ID_IDENTITY))
912 .filter(|r| {
913 r.file_created
914 .is_none_or(|created| now.duration_since(created).is_ok_and(|age| age <= max_age))
915 })
916 .collect();
917
918 let (doc, page1, layer1) = PdfDocument::new(
919 "AB1 Susceptibility Report",
920 Mm(PDF_PAGE_W),
921 Mm(PDF_PAGE_H),
922 "Layer 1",
923 );
924
925 let font = doc.add_builtin_font(BuiltinFont::Helvetica).unwrap();
926 let font_bold = doc.add_builtin_font(BuiltinFont::HelveticaBold).unwrap();
927
928 let mut layer = doc.get_page(page1).get_layer(layer1);
929 let title_y = PDF_PAGE_H - PDF_MARGIN_T - 2.0;
930 let mut y = title_y - PDF_ROW_H * 1.5;
931
932 layer.use_text(
933 format!(
934 "AB1 Susceptibility Report - {} ({} records)",
935 pdf_current_date(),
936 filtered.len()
937 ),
938 10.0_f32,
939 Mm(PDF_MARGIN_L),
940 Mm(title_y),
941 &font_bold,
942 );
943
944 pdf_hline(&layer, y + 4.0);
945 pdf_write_row(&layer, &font_bold, 7.5_f32, y, &PDF_COL_HEADERS);
946 y -= PDF_ROW_H;
947 pdf_hline(&layer, y + 4.0);
948
949 let mut layer_n = 2usize;
950
951 for rec in &filtered {
952 if y < PDF_MARGIN_B {
953 let (new_page, new_layer) =
954 doc.add_page(Mm(PDF_PAGE_W), Mm(PDF_PAGE_H), format!("Layer {layer_n}"));
955 layer_n += 1;
956 layer = doc.get_page(new_page).get_layer(new_layer);
957 y = PDF_PAGE_H - PDF_MARGIN_T - PDF_ROW_H * 1.5;
958 pdf_hline(&layer, y + 4.0);
959 pdf_write_row(&layer, &font_bold, 7.5_f32, y, &PDF_COL_HEADERS);
960 y -= PDF_ROW_H;
961 pdf_hline(&layer, y + 4.0);
962 }
963
964 let species = rec.species.as_deref().unwrap_or("");
965 let species_trunc = pdf_truncate(species, 50);
966 let calls_str = rec.susceptibility_calls.to_string();
967 let calls_trunc = pdf_truncate(&calls_str, 50);
968 let fname_trunc = pdf_truncate(&rec.file_name, 50);
969
970 let cells: [&str; 6] = [
971 rec.sample_id.as_str(),
972 rec.gene.as_deref().unwrap_or(""),
973 &species_trunc,
974 pdf_sus(rec.is_susceptible),
975 &calls_trunc,
976 &fname_trunc,
977 ];
978 pdf_write_row(&layer, &font, 7.0_f32, y, &cells);
979 y -= PDF_ROW_H;
980 pdf_hline(&layer, y + 4.0);
981 }
982
983 doc.save_to_bytes().unwrap_or_default()
984}
985
986fn pdf_write_row(
987 layer: &printpdf::PdfLayerReference,
988 font: &printpdf::IndirectFontRef,
989 size: f32,
990 y: f32,
991 cells: &[&str],
992) {
993 use printpdf::Mm;
994 for (i, text) in cells.iter().enumerate() {
995 layer.use_text(*text, size, Mm(PDF_MARGIN_L + PDF_COL_X[i]), Mm(y), font);
996 }
997}
998
999fn pdf_sus(v: Option<bool>) -> &'static str {
1000 match v {
1001 Some(true) => "S",
1002 Some(false) => "R",
1003 None => "",
1004 }
1005}
1006
1007fn pdf_hline(layer: &printpdf::PdfLayerReference, y: f32) {
1008 use printpdf::{Color, Greyscale, Line, Mm, Point};
1009 layer.set_outline_color(Color::Greyscale(Greyscale::new(0.5, None)));
1010 layer.set_outline_thickness(0.3_f32);
1011 layer.add_line(Line::from_iter(vec![
1012 (Point::new(Mm(PDF_MARGIN_L), Mm(y)), false),
1013 (Point::new(Mm(PDF_MARGIN_L + PDF_TABLE_W), Mm(y)), false),
1014 ]));
1015}
1016
1017fn pdf_truncate(s: &str, max_chars: usize) -> String {
1018 if s.len() <= max_chars {
1019 s.to_string()
1020 } else {
1021 format!("{}..", &s[..max_chars])
1022 }
1023}
1024
1025fn pdf_current_date() -> String {
1026 let secs = std::time::SystemTime::now()
1027 .duration_since(std::time::UNIX_EPOCH)
1028 .unwrap_or_default()
1029 .as_secs();
1030 let (y, m, d) = pdf_days_to_ymd((secs / 86400) as u32);
1031 format!("{y:04}-{m:02}-{d:02}")
1032}
1033
1034fn pdf_days_to_ymd(days: u32) -> (u32, u32, u32) {
1035 let jdn = days + 2_440_588;
1036 let a = jdn + 32044;
1037 let b = (4 * a + 3) / 146097;
1038 let c = a - (146097 * b) / 4;
1039 let d = (4 * c + 3) / 1461;
1040 let e = c - (1461 * d) / 4;
1041 let m = (5 * e + 2) / 153;
1042 let day = e - (153 * m + 2) / 5 + 1;
1043 let month = m + 3 - 12 * (m / 10);
1044 let year = 100 * b + d - 4800 + m / 10;
1045 (year, month, day)
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050 use super::parse_multi_fasta;
1051
1052 #[test]
1053 fn test_parse_multi_fasta_distinguishes_infrasubspecific_descriptions() {
1054 let fasta = "\
1059>NC_000962.3:c2289291-2288681 Mycobacterium tuberculosis H37Rv, complete genome
1060ACGT
1061>NC_002945.4:c2277615-2277005 Mycobacterium tuberculosis variant bovis AF2122/97 chromosome, complete sequence
1062ACGT
1063>HM007606.1 Mycobacterium avium subsp. paratuberculosis strain DSM 44133 23S ribosomal RNA gene, partial sequence
1064ACGT
1065>NC_xxx:1-4 Mycobacterium abscessus rrl
1066ACGT
1067";
1068 let entries = parse_multi_fasta(fasta);
1069 let descs: Vec<&str> = entries.iter().map(|(_, d, _)| d.as_str()).collect();
1070 assert_eq!(
1071 descs,
1072 vec![
1073 "Mycobacterium tuberculosis",
1074 "Mycobacterium tuberculosis variant bovis",
1075 "Mycobacterium avium subsp. paratuberculosis",
1076 "Mycobacterium abscessus",
1080 ]
1081 );
1082 }
1083}