Spaces:
Running
Running
| import logging | |
| from typing import List, Dict | |
| from .base import BaseNER | |
| class NEREngine(BaseNER): | |
| def __init__(self, model_name="urchade/gliner_mediumv2.1"): | |
| self.model_name = model_name | |
| self.model = None | |
| self._initialize_model() | |
| def _initialize_model(self): | |
| logging.info(f"Initializing NER model: {self.model_name}") | |
| try: | |
| from backup.model import GLiNER | |
| self.model = GLiNER.from_pretrained(self.model_name) | |
| logging.info(f"NER model '{self.model_name}' loaded successfully.") | |
| except Exception as e: | |
| logging.error(f"Failed to load NER model: {e}. NER extraction will be unavailable.") | |
| def extract_entities(self, text: str, labels: List[str] = None) -> Dict[str, List[str]]: | |
| if not text: | |
| logging.warning("NER: Received empty text for extraction.") | |
| return {} | |
| if not self.model: | |
| logging.error("NER: Model not initialized. Skipping extraction.") | |
| return {} | |
| if labels is None: | |
| labels = ["Name", "Designation", "Company", "Contact", "Address", "Email", "Link"] | |
| logging.info(f"NER: Extracting entities for {len(text)} characters of text.") | |
| try: | |
| entities = self.model.predict_entities(text, labels, threshold=0.3) | |
| structured_data = {label: [] for label in labels} | |
| for ent in entities: | |
| label = ent["label"] | |
| if label in structured_data: | |
| structured_data[label].append(ent["text"]) | |
| non_empty_tags = sum(1 for v in structured_data.values() if v) | |
| logging.info(f"NER: Extracted entities for {non_empty_tags} labels.") | |
| return structured_data | |
| except Exception as e: | |
| logging.error(f"NER: Extraction pipeline crashed: {e}") | |
| return {} | |
| def process(self, text: str) -> Dict[str, List[str]]: | |
| return self.extract_entities(text) | |