Snaseem2026
/

code-comment-classifier

Text Classification

developer-tools

Model card Files Files and versions

code-comment-classifier / src /model.py

Snaseem2026's picture

Upload folder using huggingface_hub

4089b4a verified about 1 month ago

history blame contribute delete

3.03 kB

	"""
	Model definition and utilities
	"""
	from transformers import AutoModelForSequenceClassification, AutoConfig
	from typing import Dict, Optional
	import logging
	import torch
	import torch.nn as nn


	def create_model(
	model_name: str,
	num_labels: int,
	label2id: Dict[str, int],
	id2label: Dict[int, str],
	dropout: Optional[float] = None
	):
	"""
	Create a sequence classification model with optional dropout configuration.

	Args:
	model_name: Name of the pretrained model
	num_labels: Number of classification labels
	label2id: Mapping from label names to IDs
	id2label: Mapping from IDs to label names
	dropout: Optional dropout probability for classifier head

	Returns:
	Initialized model
	"""
	config = AutoConfig.from_pretrained(
	model_name,
	num_labels=num_labels,
	label2id=label2id,
	id2label=id2label
	)

	# Set dropout if provided
	if dropout is not None:
	if hasattr(config, 'hidden_dropout_prob'):
	config.hidden_dropout_prob = dropout
	if hasattr(config, 'attention_probs_dropout_prob'):
	config.attention_probs_dropout_prob = dropout
	if hasattr(config, 'classifier_dropout'):
	config.classifier_dropout = dropout
	logging.info(f"Set model dropout to {dropout}")

	model = AutoModelForSequenceClassification.from_pretrained(
	model_name,
	config=config
	)

	return model


	def apply_class_weights(
	model: nn.Module,
	class_weights: Optional[list] = None
	) -> Optional[nn.Module]:
	"""
	Apply class weights to the model's loss function.

	Args:
	model: The model to modify
	class_weights: List of weights for each class (must match num_labels)

	Returns:
	Model with modified loss function (if class_weights provided)
	"""
	if class_weights is not None:
	weights_tensor = torch.tensor(class_weights, dtype=torch.float32)
	# Note: This requires custom Trainer with weighted loss
	logging.info(f"Class weights applied: {class_weights}")
	return weights_tensor
	return None


	def get_model_size(model: nn.Module) -> float:
	"""
	Calculate model size in millions of parameters.

	Args:
	model: PyTorch model

	Returns:
	Number of parameters in millions
	"""
	param_size = sum(p.numel() for p in model.parameters())
	return param_size / 1e6


	def get_trainable_params(model: nn.Module) -> Dict[str, int]:
	"""
	Get count of trainable and non-trainable parameters.

	Args:
	model: PyTorch model

	Returns:
	Dictionary with 'trainable' and 'total' parameter counts
	"""
	trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
	total = sum(p.numel() for p in model.parameters())
	return {
	'trainable': trainable,
	'total': total,
	'non_trainable': total - trainable
	}