esteban7856 commited on
Commit
6458c3f
verified
1 Parent(s): 1ee966e

api en fastapi para el prediagnosctico

Browse files
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ENV PIP_NO_CACHE_DIR=1 \
4
+ HF_HOME=/data/hf \
5
+ TOKENIZERS_PARALLELISM=false \
6
+ PYTHONUNBUFFERED=1
7
+
8
+ # Paquetes de sistema m铆nimos para compilar wheels si hace falta
9
+ RUN apt-get update && apt-get install -y --no-install-recommends \
10
+ build-essential && rm -rf /var/lib/apt/lists/*
11
+
12
+ WORKDIR /app
13
+ COPY requirements.txt .
14
+ # CPU-only torch (importante para no exceder memoria)
15
+ RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
16
+
17
+ # Copiamos el c贸digo
18
+ COPY app ./app
19
+ COPY model ./model
20
+
21
+ # Puerto esperado por Spaces
22
+ EXPOSE 7860
23
+
24
+ # Iniciar FastAPI
25
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
app/__pycache__/main.cpython-311.pyc ADDED
Binary file (7.05 kB). View file
 
app/main.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/main.py
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ import os, json, re, torch
5
+ from huggingface_hub import hf_hub_download
6
+ from transformers import AutoTokenizer
7
+ from model.model import BETO_LSTM, TOKENIZER_ID
8
+ from app.utils.synonym_dict import synonym_dict, normalize_text
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+
11
+ #=== configuracion del cors ===
12
+ app = FastAPI(title="Prediagn贸stico M茅dico")
13
+
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins=["*"],
17
+ allow_credentials=True,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ # ===== Configuraci贸n del modelo en Hugging Face =====
23
+ REPO_ID = "esteban7856/respiratorio-beto"
24
+ REVISION = "main" # o "main"
25
+ MODEL_FILE = "best_model.pt"
26
+ LMAP_FILE = "label_mapping.json"
27
+ HF_TOKEN = os.getenv("HF_TOKEN") # opcional si el repo es p煤blico
28
+
29
+ # ===== Hiperpar谩metros de inferencia =====
30
+ MAX_LEN = 64
31
+ THRESHOLD = 0.55 # ajusta tras validar
32
+
33
+ # ===== Descarga artefactos del Hub =====
34
+ model_path = hf_hub_download(REPO_ID, MODEL_FILE, revision=REVISION, token=HF_TOKEN)
35
+ lmap_path = hf_hub_download(REPO_ID, LMAP_FILE, revision=REVISION, token=HF_TOKEN)
36
+
37
+ with open(lmap_path, "r", encoding="utf-8") as f:
38
+ id2label = {int(k): v for k, v in json.load(f).items()}
39
+ NUM_CLASSES = len(id2label)
40
+
41
+ # ===== Carga tokenizer y modelo =====
42
+ tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_ID)
43
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44
+
45
+ model = BETO_LSTM(hidden_dim=256, bidirectional=True, num_classes=NUM_CLASSES, freeze_bert=True)
46
+ state = torch.load(model_path, map_location="cpu")
47
+ model.load_state_dict(state)
48
+ model.to(device).eval()
49
+
50
+ # ===== FastAPI =====
51
+ app = FastAPI(title="Prediagn贸stico M茅dico")
52
+
53
+ class InputText(BaseModel):
54
+ texto: str
55
+
56
+ # --- Limpieza de saludos / fillers ---
57
+ GREET_PATTERNS = [
58
+ r"^\s*hola[!,.\s]*", r"^\s*buenos dias[!,.\s]*", r"^\s*buenas tardes[!,.\s]*",
59
+ r"^\s*buenas noches[!,.\s]*", r"^\s*buen dia[!,.\s]*"
60
+ ]
61
+ def strip_greetings(text: str) -> str:
62
+ t = text.lower()
63
+ for pat in GREET_PATTERNS:
64
+ t = re.sub(pat, "", t)
65
+ return re.sub(r"\s{2,}", " ", t).strip()
66
+
67
+ # --- Conjunto de s铆ntomas can贸nicos (guardarra铆l de producci贸n) ---
68
+ RESP_SYMPTOMS = {
69
+ "fiebre", "alzas t茅rmicas", "tos seca", "tos con expectoraci贸n", "tos productiva",
70
+ "disnea", "dificultad para respirar", "sibilancias", "rinorrea", "congesti贸n nasal",
71
+ "dolor tor谩cico", "taquipnea", "retracci贸n intercostal", "cianosis",
72
+ "odinofagia", "hiporexia", "somnolienta", "malestar general"
73
+ }
74
+ def contains_symptom(text: str) -> bool:
75
+ for term in RESP_SYMPTOMS:
76
+ if re.search(rf"\b{re.escape(term)}\b", text):
77
+ return True
78
+ if re.search(r"\btos\b", text):
79
+ return True
80
+ return False
81
+
82
+ @app.post("/predict")
83
+ def predict(data: InputText):
84
+ texto_original = data.texto
85
+
86
+ # 1) Normalizaci贸n igual que en entrenamiento + quitar saludos
87
+ texto_norm = normalize_text(texto_original.lower(), synonym_dict)
88
+ texto_proc = strip_greetings(texto_norm)
89
+
90
+ # 2) Tokenizaci贸n
91
+ inputs = tokenizer(
92
+ texto_proc,
93
+ return_tensors="pt",
94
+ truncation=True,
95
+ padding=True,
96
+ max_length=MAX_LEN
97
+ )
98
+ inputs = {k: v.to(device) for k, v in inputs.items()}
99
+
100
+ # 3) Inferencia (logits -> softmax aqu铆)
101
+ with torch.no_grad():
102
+ logits = model(inputs["input_ids"], inputs["attention_mask"])
103
+ # probs: tensor shape [1, num_classes]
104
+ probs = torch.softmax(logits, dim=1)[0].cpu()
105
+
106
+ pmax, pred = torch.max(probs, dim=0)
107
+ final_pred = int(pred.item())
108
+ final_conf = float(pmax.item())
109
+
110
+ # 4) Regla pr谩ctica: si hay s铆ntomas, evita 3 ("No enfermedad")
111
+ if contains_symptom(texto_proc):
112
+ if final_pred == 3 or final_conf < THRESHOLD:
113
+ probs012 = probs[:3] # clases 0,1,2
114
+ best012 = int(torch.argmax(probs012).item())
115
+ final_pred = best012
116
+ final_conf = float(probs012[best012].item())
117
+ else:
118
+ if final_pred != 3 and final_conf < THRESHOLD:
119
+ final_pred = 3
120
+
121
+ return {
122
+ "texto_original": texto_original,
123
+ "texto_normalizado": texto_proc,
124
+ "diagn贸stico": id2label[final_pred],
125
+ "confianza": round(final_conf, 3)
126
+ }
127
+
128
+ @app.get("/health")
129
+ def health():
130
+ return {
131
+ "status": "ok",
132
+ "num_classes": NUM_CLASSES,
133
+ "labels": id2label,
134
+ "device": str(device),
135
+ "repo": {"id": REPO_ID, "rev": REVISION}
136
+ }
app/prewarm.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub import hf_hub_download
3
+
4
+ REPO_ID = "esteban7856/respiratorio-beto"
5
+ REVISION = "main"
6
+ _ = hf_hub_download(REPO_ID, "label_mapping.json", revision=REVISION, token=os.getenv("HF_TOKEN"))
7
+ _ = hf_hub_download(REPO_ID, "best_model.pt", revision=REVISION, token=os.getenv("HF_TOKEN"))
8
+
9
+ # Precaliento del beto
10
+ from transformers import AutoModel
11
+ from model.model import TOKENIZER_ID
12
+ AutoModel.from_pretrained(TOKENIZER_ID)
13
+ print("prewarm listo")
app/utils/__pycache__/synonym_dict.cpython-311.pyc ADDED
Binary file (3.43 kB). View file
 
app/utils/synonym_dict.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ synonym_dict = {
4
+ "rinorrea": ["mocos como agua", "agua en la nariz", "nariz mocosa", "goteo de mocos como agua"],
5
+ "fiebre": ["temperatura alta", "calor", "alta temperatura", "calor intenso"],
6
+ "tos seca esporadica": ["tos espontanea", "a veces tos"],
7
+ "tos con expectoraci贸n": ["tos con flema", "tos con moco", "tos con expectoraci贸n"],
8
+ "alzas t茅rmicas": ["temperaturas altas", "calor intenso"],
9
+ "piel p谩lida": ["piel p谩lida"],
10
+ "piel y mucosas p谩lidas": ["mucosas p谩lidas"],
11
+ "disnea": ["dificultad para respirar", "respiraci贸n r谩pida", "respiraci贸n dif铆cil", "respiraci贸n dificultada"],
12
+ "somnolienta": ["cansancio", "sue帽o", "agotado"],
13
+ "cefalea": ["dolor de cabeza", "dolor de cabeza intenso", "dolor de cabeza severo", "dolor de cabeza fuerte"],
14
+ "tos seca sin secreciones": ["tos sin flema", "tos irritativa"],
15
+ "tos seca": ["tos seca sin secreciones"],
16
+ "hiporexia": ["rechaza alimentos", "no quiere comer", "no quiere lactar", "no tiene apetito"],
17
+ "disfon铆a": ["dificultad para hablar", "habla con dificultad", "ronco", "voz ronca"],
18
+ "malestar general": ["malestar", "no se siente bien", "malestar generalizado"],
19
+ "aumento de frecuencia respiratoria": ["frecuencia respiratoria aumentada", "respiraci贸n r谩pida", "respiraci贸n dif铆cil"],
20
+ "sibilancias": ["silbido al respirar", "sonido al respirar", "respiraci贸n con silbido", "resoplido", "silbido"],
21
+ "astenica": ["sensaci贸n de debilidad", "falta de energ铆a", "cansancio"],
22
+ "eructos f茅tidos": ["eructos de mal olor", "eructos fuertes", "eructos intensos"],
23
+ "febril": ["temperatura alta", "calor corporal"],
24
+ }
25
+
26
+ def normalize_text(text: str, synonym_dict: dict) -> str:
27
+ text = text.lower()
28
+ replacements = []
29
+ for medical_term, synonyms in synonym_dict.items():
30
+ if re.search(r'\b' + re.escape(medical_term) + r'\b', text, re.IGNORECASE):
31
+ continue
32
+ for synonym in synonyms:
33
+ if synonym.lower() != medical_term.lower():
34
+ replacements.append((synonym, medical_term))
35
+
36
+ replacements.sort(key=lambda x: len(x[0]), reverse=True)
37
+
38
+ for synonym, medical_term in replacements:
39
+ pattern = r'\b' + re.escape(synonym) + r'\b'
40
+ text = re.sub(pattern, medical_term, text, flags=re.IGNORECASE)
41
+
42
+ return text
model/__pycache__/model.cpython-311.pyc ADDED
Binary file (2.62 kB). View file
 
model/model.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModel
2
+ import torch.nn as nn
3
+ import torch
4
+
5
+ # Debe coincidir con el usado en train/main
6
+ TOKENIZER_ID = "dccuchile/bert-base-spanish-wwm-cased"
7
+
8
+ class BETO_LSTM(nn.Module):
9
+ def __init__(self, hidden_dim=256, num_classes=4, bidirectional=True, freeze_bert=True, dropout=0.2):
10
+ super().__init__()
11
+ self.bert = AutoModel.from_pretrained(TOKENIZER_ID)
12
+
13
+ # Congelar BERT (煤til si entrenaste la cabeza primero)
14
+ if freeze_bert:
15
+ for p in self.bert.parameters():
16
+ p.requires_grad = False
17
+
18
+ self.lstm = nn.LSTM(
19
+ input_size=768,
20
+ hidden_size=hidden_dim,
21
+ batch_first=True,
22
+ bidirectional=bidirectional
23
+ )
24
+ self.dropout = nn.Dropout(dropout)
25
+ out_dim = hidden_dim * (2 if bidirectional else 1)
26
+ self.fc = nn.Linear(out_dim, num_classes)
27
+
28
+ def forward(self, input_ids, attention_mask):
29
+ # Devolver LOGITS (sin softmax)
30
+ outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
31
+ seq = outputs.last_hidden_state # [B, T, 768]
32
+ lstm_out, _ = self.lstm(seq) # [B, T, H*dir]
33
+
34
+ # 脷ltimo token real (no padding) usando attention_mask
35
+ lengths = attention_mask.sum(dim=1) # [B]
36
+ last_idx = (lengths - 1).clamp(min=0) # [B]
37
+ batch_idx = torch.arange(lstm_out.size(0), device=lstm_out.device)
38
+ last_hidden = lstm_out[batch_idx, last_idx, :] # [B, H*dir]
39
+
40
+ logits = self.fc(self.dropout(last_hidden)) # [B, num_classes]
41
+ return logits
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ --extra-index-url https://download.pytorch.org/whl/cpu
2
+ torch==2.1.0+cpu
3
+ transformers==4.33.0
4
+ huggingface_hub==0.36.0
5
+ fastapi==0.110.0
6
+ uvicorn==0.29.0
7
+ numpy==1.26.0
8
+ scikit-learn==1.3.0|
9
+ pandas==2.1.0
10
+ openpyxl==3.1.2