import logging from typing import List, Dict from .base import BaseNER class NEREngine(BaseNER): def __init__(self, model_name="urchade/gliner_mediumv2.1"): self.model_name = model_name self.model = None self._initialize_model() def _initialize_model(self): logging.info(f"Initializing NER model: {self.model_name}") try: from backup.model import GLiNER self.model = GLiNER.from_pretrained(self.model_name) logging.info(f"NER model '{self.model_name}' loaded successfully.") except Exception as e: logging.error(f"Failed to load NER model: {e}. NER extraction will be unavailable.") def extract_entities(self, text: str, labels: List[str] = None) -> Dict[str, List[str]]: if not text: logging.warning("NER: Received empty text for extraction.") return {} if not self.model: logging.error("NER: Model not initialized. Skipping extraction.") return {} if labels is None: labels = ["Name", "Designation", "Company", "Contact", "Address", "Email", "Link"] logging.info(f"NER: Extracting entities for {len(text)} characters of text.") try: entities = self.model.predict_entities(text, labels, threshold=0.3) structured_data = {label: [] for label in labels} for ent in entities: label = ent["label"] if label in structured_data: structured_data[label].append(ent["text"]) non_empty_tags = sum(1 for v in structured_data.values() if v) logging.info(f"NER: Extracted entities for {non_empty_tags} labels.") return structured_data except Exception as e: logging.error(f"NER: Extraction pipeline crashed: {e}") return {} def process(self, text: str) -> Dict[str, List[str]]: return self.extract_entities(text)