cosmic_files/sequencing/
tb_data.rs

1use serde::Deserialize;
2use std::collections::HashMap;
3use std::sync::LazyLock;
4
5/// Ranks a WHO-catalogue confidence label from strongest resistance evidence (0) to weakest /
6/// unknown (5). Shared across modules so every gene's susceptibility call uses the same scale.
7pub(crate) fn confidence_rank(conf: &str) -> u8 {
8    match conf {
9        "Assoc w R" => 0,
10        "Assoc w R - Interim" => 1,
11        "Uncertain significance" => 2,
12        "Not assoc w R - Interim" => 3,
13        "Not assoc w R" => 4,
14        _ => 5, // unknown / fallback
15    }
16}
17#[derive(Debug, Deserialize, Clone)]
18pub struct TbProfilerJson {
19    pub pipeline: Pipeline,
20    #[serde(default)]
21    pub dr_variants: Vec<DrVariant>,
22}
23
24impl std::fmt::Display for TbProfilerJson {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        f.debug_struct("TbProfilerJson")
27            .field("pipeline", &self.pipeline)
28            .field("dr_variants", &self.dr_variants)
29            .finish()
30    }
31}
32
33#[derive(Debug, Deserialize, Clone)]
34pub struct Pipeline {
35    pub db_version: DbVersion,
36}
37
38#[derive(Debug, Deserialize, Clone)]
39pub struct DbVersion {
40    pub name: String,
41    pub commit: String,
42    #[serde(rename = "db-schema-version", default)]
43    pub db_schema_version: Option<String>,
44    #[serde(rename = "tb-profiler-version")]
45    pub tb_profiler_version: String,
46}
47
48#[derive(Debug, Deserialize, Clone)]
49pub struct DrVariant {
50    pub gene_id: String,
51    pub gene_name: String,
52    pub change: String,
53    #[serde(default)]
54    pub drugs: Vec<Drugs>,
55}
56
57impl DrVariant {
58    pub fn highest_confidence_rank(&self) -> u8 {
59        self.drugs
60            .iter()
61            .map(|d| confidence_rank(d.confidence.as_str()))
62            .min() // strongest (lowest number)
63            .unwrap_or(5)
64    }
65    pub fn is_susceptible(&self) -> bool {
66        self.drugs
67            .iter()
68            .map(|d| confidence_rank(d.confidence.as_str()))
69            .min() // strongest (lowest rank)
70            .map(|rank| rank >= 2)
71            .unwrap_or(true) // if no drugs listed, treat as susceptible
72    }
73}
74
75#[derive(Debug, Deserialize, Clone)]
76pub struct Drugs {
77    pub drug: String,
78    pub confidence: String,
79}
80
81#[derive(Debug, Deserialize, Clone)]
82struct TBMappingRow {
83    ecoli: String,
84
85    #[serde(rename = "Mutation")]
86    mutation: String,
87    #[serde(rename = "Gene")]
88    gene: String,
89}
90
91pub static TB_ECOLI_MAPPING: LazyLock<HashMap<(String, String), String>> = LazyLock::new(|| {
92    let mut rdr =
93        csv::Reader::from_reader(include_str!("../../res/tb_ecoli_mapping.csv").as_bytes());
94    let mut map = HashMap::new();
95    for row in rdr.deserialize::<TBMappingRow>() {
96        let row = row.unwrap();
97        map.insert(
98            (row.gene.trim().to_string(), row.mutation.trim().to_string()),
99            row.ecoli.trim().to_string(),
100        );
101    }
102    map
103});