1use super::reverse_complement;
2use super::{GappedAlignment, REF_MYCO_HSP65, SeqIdHit, align_to_ref, base_at_ref_pos, dedup_substring_same_desc, parse_multi_fasta};
3use super::{KANSASII_GASTRI_ACCS, MARINUM_ULCERANS_ACCS};
4use serde::{Deserialize, Serialize};
5use std::sync::LazyLock;
6
7const KANSASII_GASTRI_SNPS: &[(usize, u8, u8)] = &[
8 (100, b'C', b'T'),
10 (130, b'C', b'T'),
11 (148, b'G', b'A'),
12 (193, b'G', b'C'),
13 (283, b'C', b'G'),
14 (304, b'T', b'C'),
15 (349, b'G', b'C'),
16 (399, b'G', b'A'),
17];
18
19const MARINUM_ULCERANS_SNPS: &[(usize, u8, u8)] = &[
20 (20, b'C', b'T'),
22 (204, b'T', b'C'),
23 (212, b'G', b'A'),
24 (482, b'T', b'C'),
25 (500, b'G', b'C'),
26];
27
28
29fn call_kansasii_gastri_snps(ga: &GappedAlignment) -> Vec<KansasiiGastriSnpCall> {
30 KANSASII_GASTRI_SNPS
31 .iter()
32 .filter_map(|&(ref_pos, gastri_base, kansasii_base)| {
33 let query_base =
34 base_at_ref_pos(&ga.gapped_query, &ga.gapped_ref, ga.ref_start, ref_pos)?;
35 Some(KansasiiGastriSnpCall {
36 ref_pos,
37 query_base,
38 gastri_base,
39 kansasii_base,
40 })
41 })
42 .collect()
43}
44
45fn call_marinum_ulcerans_snps(ga: &GappedAlignment) -> Vec<MarinumUlceransSnpCall> {
46 MARINUM_ULCERANS_SNPS
47 .iter()
48 .filter_map(|&(ref_pos, marinum_base, ulcerans_base)| {
49 let query_base =
50 base_at_ref_pos(&ga.gapped_query, &ga.gapped_ref, ga.ref_start, ref_pos)?;
51 Some(MarinumUlceransSnpCall {
52 ref_pos,
53 query_base,
54 marinum_base,
55 ulcerans_base,
56 })
57 })
58 .collect()
59}
60
61static HSP65_REFS: LazyLock<Vec<(String, String, Vec<u8>)>> =
63 LazyLock::new(|| dedup_substring_same_desc(parse_multi_fasta(REF_MYCO_HSP65)));
64
65pub fn trim_hsp65_primers(seq: &[u8]) -> Vec<u8> {
72 const FWD_START: &[&[u8]] = &[b"ATGGTGTGTCCATCGCCAAG", b"GAGGACCCGTACGAGAAGATCGGCGCTGA"];
73 const FWD_END: &[&[u8]] = &[b"GAGCTCACCGAGGGTATGCG", b"CGCTGTCCACCCTGGTCGTC"];
74
75 let rc_start: Vec<Vec<u8>> = FWD_END.iter().map(|p| reverse_complement(p)).collect();
78 let rc_end: Vec<Vec<u8>> = FWD_START.iter().map(|p| reverse_complement(p)).collect();
79
80 let find_start = |p: &[u8]| seq.windows(p.len()).position(|w| w.eq_ignore_ascii_case(p));
81 let find_end = |p: &[u8]| {
82 seq.windows(p.len())
83 .rposition(|w| w.eq_ignore_ascii_case(p))
84 .map(|pos| pos + p.len())
85 };
86
87 let start = FWD_START
88 .iter()
89 .map(|p| p as &[u8])
90 .chain(rc_start.iter().map(|p| p.as_slice()))
91 .filter_map(find_start)
92 .min()
93 .unwrap_or(0);
94
95 let end = FWD_END
96 .iter()
97 .map(|p| p as &[u8])
98 .chain(rc_end.iter().map(|p| p.as_slice()))
99 .filter_map(find_end)
100 .max()
101 .unwrap_or(seq.len());
102
103 seq[start..end.min(seq.len())].to_vec()
104}
105
106pub fn identify_sequence_hsp65(query: &[u8]) -> Vec<SeqIdHit> {
109 let query = trim_hsp65_primers(query);
110 let query = query.as_slice();
111 let rc = reverse_complement(query);
112
113 let mut hits: Vec<SeqIdHit> = HSP65_REFS
114 .iter()
115 .map(|(raw_acc, description, refseq)| {
116 let accession = raw_acc
118 .rsplit_once('.')
119 .map(|(base, _)| base.to_string())
120 .unwrap_or_else(|| raw_acc.clone());
121 let fwd = align_to_ref(query, refseq);
122 let rev = align_to_ref(&rc, refseq);
123 let (ga, is_reverse) = if rev.identity > fwd.identity {
124 (rev, true)
125 } else {
126 (fwd, false)
127 };
128 let kansasii_gastri_snp_calls =
129 if KANSASII_GASTRI_ACCS.contains(&accession.as_str()) {
130 call_kansasii_gastri_snps(&ga)
131 } else {
132 vec![]
133 };
134 let marinum_ulcerans_snp_calls =
135 if MARINUM_ULCERANS_ACCS.contains(&accession.as_str()) {
136 call_marinum_ulcerans_snps(&ga)
137 } else {
138 vec![]
139 };
140 SeqIdHit {
141 accession,
142 description: description.clone(),
143 identity: ga.identity,
144 is_reverse,
145 kansasii_gastri_snp_calls,
146 marinum_ulcerans_snp_calls,
147 rrl_snp_calls: vec![],
148 rrs_snp_calls: vec![],
149 erm41_snp_calls: vec![],
150 pnca_snp_calls: vec![],
151 aligned_query: ga.gapped_query,
152 aligned_ref: ga.gapped_ref,
153 ref_start: ga.ref_start,
154 erm41_position_28_opt: None,
155 rrl_position_2058_2059_opt: None,
156 }
157 })
158 .collect();
159
160 hits.sort_by(|a, b| {
161 b.identity
162 .partial_cmp(&a.identity)
163 .unwrap_or(std::cmp::Ordering::Equal)
164 });
165 hits
166}
167
168
169#[derive(Clone, Debug, Serialize, Deserialize)]
171pub struct KansasiiGastriSnpCall {
172 pub ref_pos: usize,
174 pub query_base: u8,
176 pub gastri_base: u8,
178 pub kansasii_base: u8,
180}
181
182impl KansasiiGastriSnpCall {
183 pub fn is_gastri(&self) -> bool {
184 self.query_base == self.gastri_base
185 }
186 pub fn is_kansasii(&self) -> bool {
187 self.query_base == self.kansasii_base
188 }
189 pub fn species_tag(&self) -> &'static str {
191 if self.is_gastri() {
192 "M. gastri"
193 } else if self.is_kansasii() {
194 "M. kansasii"
195 } else {
196 "?"
197 }
198 }
199}
200
201#[derive(Clone, Debug, Serialize, Deserialize)]
203pub struct MarinumUlceransSnpCall {
204 pub ref_pos: usize,
206 pub query_base: u8,
208 pub marinum_base: u8,
210 pub ulcerans_base: u8,
212}
213
214impl MarinumUlceransSnpCall {
215 pub fn is_marinum(&self) -> bool {
216 self.query_base == self.marinum_base
217 }
218 pub fn is_ulcerans(&self) -> bool {
219 self.query_base == self.ulcerans_base
220 }
221 pub fn species_tag(&self) -> &'static str {
223 if self.is_marinum() {
224 "M. marinum"
225 } else if self.is_ulcerans() {
226 "M. ulcerans"
227 } else {
228 "?"
229 }
230 }
231}
232