| | """ |
| | Model definition and utilities |
| | """ |
| | from transformers import AutoModelForSequenceClassification, AutoConfig |
| | from typing import Dict, Optional |
| | import logging |
| | import torch |
| | import torch.nn as nn |
| |
|
| |
|
| | def create_model( |
| | model_name: str, |
| | num_labels: int, |
| | label2id: Dict[str, int], |
| | id2label: Dict[int, str], |
| | dropout: Optional[float] = None |
| | ): |
| | """ |
| | Create a sequence classification model with optional dropout configuration. |
| | |
| | Args: |
| | model_name: Name of the pretrained model |
| | num_labels: Number of classification labels |
| | label2id: Mapping from label names to IDs |
| | id2label: Mapping from IDs to label names |
| | dropout: Optional dropout probability for classifier head |
| | |
| | Returns: |
| | Initialized model |
| | """ |
| | config = AutoConfig.from_pretrained( |
| | model_name, |
| | num_labels=num_labels, |
| | label2id=label2id, |
| | id2label=id2label |
| | ) |
| | |
| | |
| | if dropout is not None: |
| | if hasattr(config, 'hidden_dropout_prob'): |
| | config.hidden_dropout_prob = dropout |
| | if hasattr(config, 'attention_probs_dropout_prob'): |
| | config.attention_probs_dropout_prob = dropout |
| | if hasattr(config, 'classifier_dropout'): |
| | config.classifier_dropout = dropout |
| | logging.info(f"Set model dropout to {dropout}") |
| | |
| | model = AutoModelForSequenceClassification.from_pretrained( |
| | model_name, |
| | config=config |
| | ) |
| | |
| | return model |
| |
|
| |
|
| | def apply_class_weights( |
| | model: nn.Module, |
| | class_weights: Optional[list] = None |
| | ) -> Optional[nn.Module]: |
| | """ |
| | Apply class weights to the model's loss function. |
| | |
| | Args: |
| | model: The model to modify |
| | class_weights: List of weights for each class (must match num_labels) |
| | |
| | Returns: |
| | Model with modified loss function (if class_weights provided) |
| | """ |
| | if class_weights is not None: |
| | weights_tensor = torch.tensor(class_weights, dtype=torch.float32) |
| | |
| | logging.info(f"Class weights applied: {class_weights}") |
| | return weights_tensor |
| | return None |
| |
|
| |
|
| | def get_model_size(model: nn.Module) -> float: |
| | """ |
| | Calculate model size in millions of parameters. |
| | |
| | Args: |
| | model: PyTorch model |
| | |
| | Returns: |
| | Number of parameters in millions |
| | """ |
| | param_size = sum(p.numel() for p in model.parameters()) |
| | return param_size / 1e6 |
| |
|
| |
|
| | def get_trainable_params(model: nn.Module) -> Dict[str, int]: |
| | """ |
| | Get count of trainable and non-trainable parameters. |
| | |
| | Args: |
| | model: PyTorch model |
| | |
| | Returns: |
| | Dictionary with 'trainable' and 'total' parameter counts |
| | """ |
| | trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| | total = sum(p.numel() for p in model.parameters()) |
| | return { |
| | 'trainable': trainable, |
| | 'total': total, |
| | 'non_trainable': total - trainable |
| | } |
| |
|