Spaces:

Akshatbhatt515334
/

AEGIS-SECURE-API

Running

AEGIS-SECURE-API / report.py

Akshat Bhatt

added code

e2e0c18 27 days ago

13.4 kB

	import os
	import pandas as pd
	import numpy as np
	import joblib
	import torch
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
	from sklearn.preprocessing import StandardScaler
	import config
	from models import get_dl_models, PhishingDataset, FinetunedBERT

	sns.set_style("whitegrid")
	plt.rcParams['figure.figsize'] = (12, 8)
	plt.rcParams['font.size'] = 11

	COLORS = {
	'primary': '#FF6B6B',
	'secondary': '#4ECDC4',
	'tertiary': '#45B7D1',
	'quaternary': '#FFA07A',
	'quinary': '#98D8C8',
	'bg': '#F7F7F7',
	'text': '#2C3E50'
	}

	MODEL_THRESHOLDS = {
	'attention_blstm': 0.8,
	'rcnn': 0.8,
	'logistic': 0.5,
	'svm': 0.5,
	'xgboost': 0.5,
	'bert': 0.5
	}

	def load_sample_data(sample_fraction=0.05):
	print(f"Loading {sample_fraction*100}% sample from data...")

	if os.path.exists(config.ENGINEERED_TEST_FILE):
	df = pd.read_csv(config.ENGINEERED_TEST_FILE)
	print(f"Loaded test data: {len(df)} samples")
	elif os.path.exists(config.ENGINEERED_TRAIN_FILE):
	df = pd.read_csv(config.ENGINEERED_TRAIN_FILE)
	print(f"Loaded train data: {len(df)} samples")
	else:
	data_files = [
	os.path.join(config.DATA_DIR, 'url_data_labeled.csv'),
	os.path.join(config.DATA_DIR, 'data_bal - 20000.csv')
	]
	df = None
	for file in data_files:
	if os.path.exists(file):
	df = pd.read_csv(file)
	print(f"Loaded raw data: {len(df)} samples")
	break

	if df is None:
	raise FileNotFoundError("No data file found!")

	sample_size = max(int(len(df) * sample_fraction), config.REPORT_SAMPLE_SIZE)
	sample_size = min(sample_size, len(df))
	df_sample = df.sample(n=sample_size, random_state=42)

	print(f"Sampled {len(df_sample)} URLs for report generation")
	return df_sample

	def prepare_ml_data(df):
	X = df[config.NUMERICAL_FEATURES + config.CATEGORICAL_FEATURES]
	y = df['label'].values

	X.loc[:, config.NUMERICAL_FEATURES] = X.loc[:, config.NUMERICAL_FEATURES].fillna(-1)
	X.loc[:, config.CATEGORICAL_FEATURES] = X.loc[:, config.CATEGORICAL_FEATURES].fillna('N/A')

	return X, y

	def prepare_dl_data(df):
	X = df[config.NUMERICAL_FEATURES].fillna(-1).values
	y = df['label'].values

	scaler_path = os.path.join(config.MODELS_DIR, "dl_scaler.pkl")
	if os.path.exists(scaler_path):
	scaler = joblib.load(scaler_path)
	X_scaled = scaler.transform(X)
	else:
	scaler = StandardScaler()
	X_scaled = scaler.fit_transform(X)

	return X_scaled, y

	def predict_ml_models(X, y):
	predictions = {}
	scores = {}

	ml_models = ['logistic', 'svm', 'xgboost']

	for model_name in ml_models:
	model_path = os.path.join(config.MODELS_DIR, f"{model_name}.joblib")
	if not os.path.exists(model_path):
	print(f"WARNING: Model {model_name} not found, skipping...")
	continue

	print(f"Loading {model_name} model...")
	model = joblib.load(model_path)

	y_pred = model.predict(X)
	y_proba = model.predict_proba(X)[:, 1]

	predictions[model_name] = y_pred
	scores[model_name] = y_proba

	acc = accuracy_score(y, y_pred)
	print(f" {model_name} accuracy: {acc:.4f}")

	return predictions, scores

	def predict_dl_models(X, y):
	predictions = {}
	scores = {}

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	input_dim = X.shape[1]

	dl_models_dict = get_dl_models(input_dim)

	for model_name, model in dl_models_dict.items():
	model_path = os.path.join(config.MODELS_DIR, f"{model_name}.pt")
	if not os.path.exists(model_path):
	print(f"WARNING: Model {model_name} not found, skipping...")
	continue

	print(f"Loading {model_name} model...")
	model.load_state_dict(torch.load(model_path, map_location=device, weights_only=True))
	model.to(device)
	model.eval()

	X_tensor = torch.tensor(X, dtype=torch.float32).to(device)

	with torch.no_grad():
	outputs = model(X_tensor).cpu().numpy().flatten()

	threshold = MODEL_THRESHOLDS.get(model_name, 0.5)
	y_pred = (outputs > threshold).astype(int)

	predictions[model_name] = y_pred
	scores[model_name] = outputs

	acc = accuracy_score(y, y_pred)
	print(f" {model_name} accuracy: {acc:.4f} (threshold: {threshold})")

	del model, X_tensor
	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	return predictions, scores

	def predict_bert_model(df, y):
	bert_path = os.path.join(config.BASE_DIR, 'finetuned_bert')
	if not os.path.exists(bert_path):
	print(f"WARNING: BERT model not found at {bert_path}, skipping...")
	return None, None

	if 'url' not in df.columns:
	print("WARNING: 'url' column not found in data, skipping BERT...")
	return None, None

	try:
	print("Loading BERT model...")
	bert_model = FinetunedBERT(bert_path)

	urls = df['url'].tolist()

	batch_size = 32
	all_preds = []
	all_probas = []

	print(f"Processing {len(urls)} URLs in batches of {batch_size}...")
	for i in range(0, len(urls), batch_size):
	batch_urls = urls[i:i+batch_size]
	batch_preds = bert_model.predict(batch_urls)
	batch_probas = bert_model.predict_proba(batch_urls)[:, 1]
	all_preds.extend(batch_preds)
	all_probas.extend(batch_probas)

	if torch.cuda.is_available():
	torch.cuda.empty_cache()

	y_pred = 1-np.array(all_preds)
	y_proba = 1-np.array(all_probas)

	acc = accuracy_score(y, y_pred)
	print(f" BERT accuracy: {acc:.4f}")

	return y_pred, y_proba

	except torch.cuda.OutOfMemoryError:
	print("WARNING: CUDA out of memory for BERT model, skipping...")
	print(" Try reducing batch size or use CPU by setting CUDA_VISIBLE_DEVICES=''")
	return None, None
	except Exception as e:
	print(f"WARNING: Error loading BERT model: {e}")
	return None, None

	def plot_confusion_matrices(y_true, all_predictions, save_dir):
	print("\nGenerating confusion matrices...")

	n_models = len(all_predictions)
	if n_models == 0:
	print("No predictions to plot!")
	return

	cols = min(3, n_models)
	rows = (n_models + cols - 1) // cols

	fig, axes = plt.subplots(rows, cols, figsize=(6cols, 5rows))
	if n_models == 1:
	axes = [axes]
	else:
	axes = axes.flatten() if rows > 1 else axes

	cmap = sns.color_palette("RdYlGn_r", as_cmap=True)

	for idx, (model_name, y_pred) in enumerate(all_predictions.items()):
	ax = axes[idx]

	cm = confusion_matrix(y_true, y_pred)

	sns.heatmap(cm, annot=True, fmt='d', cmap=cmap, ax=ax,
	cbar_kws={'label': 'Count'},
	annot_kws={'size': 14, 'weight': 'bold'})

	ax.set_title(f'{model_name.upper()} Confusion Matrix',
	fontsize=14, fontweight='bold', color=COLORS['text'])
	ax.set_xlabel('Predicted Label', fontsize=12, fontweight='bold')
	ax.set_ylabel('True Label', fontsize=12, fontweight='bold')
	ax.set_xticklabels(['Legitimate (0)', 'Phishing (1)'])
	ax.set_yticklabels(['Legitimate (0)', 'Phishing (1)'])

	for idx in range(n_models, len(axes)):
	fig.delaxes(axes[idx])

	plt.tight_layout()
	save_path = os.path.join(save_dir, 'confusion_matrices.png')
	plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white')
	print(f"Saved confusion matrices to {save_path}")
	plt.close()

	def plot_accuracy_comparison(y_true, all_predictions, save_dir):
	print("\nGenerating accuracy comparison plot...")

	if len(all_predictions) == 0:
	print("No predictions to plot!")
	return

	accuracies = {}
	for model_name, y_pred in all_predictions.items():
	acc = accuracy_score(y_true, y_pred)
	accuracies[model_name] = acc

	models = list(accuracies.keys())
	accs = list(accuracies.values())

	colors_list = [COLORS['primary'], COLORS['secondary'], COLORS['tertiary'],
	COLORS['quaternary'], COLORS['quinary']]
	bar_colors = [colors_list[i % len(colors_list)] for i in range(len(models))]

	fig, ax = plt.subplots(figsize=(12, 7))

	bars = ax.bar(models, accs, color=bar_colors, edgecolor='black', linewidth=2, alpha=0.8)

	for bar, acc in zip(bars, accs):
	height = bar.get_height()
	ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
	f'{acc:.4f}',
	ha='center', va='bottom', fontsize=13, fontweight='bold')

	ax.set_xlabel('Models', fontsize=14, fontweight='bold', color=COLORS['text'])
	ax.set_ylabel('Accuracy', fontsize=14, fontweight='bold', color=COLORS['text'])
	ax.set_title('Model Accuracy Comparison', fontsize=18, fontweight='bold',
	color=COLORS['text'], pad=20)
	ax.set_ylim([0, 1.1])
	ax.grid(axis='y', alpha=0.3, linestyle='--')
	ax.set_axisbelow(True)

	plt.xticks(rotation=45, ha='right', fontsize=12)
	plt.tight_layout()

	save_path = os.path.join(save_dir, 'accuracy_comparison.png')
	plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white')
	print(f"Saved accuracy comparison to {save_path}")
	plt.close()

	def plot_score_vs_label(y_true, all_scores, save_dir):
	print("\nGenerating score vs label scatter plots...")

	if len(all_scores) == 0:
	print("No scores to plot!")
	return

	n_models = len(all_scores)
	cols = min(3, n_models)
	rows = (n_models + cols - 1) // cols

	fig, axes = plt.subplots(rows, cols, figsize=(6cols, 5rows))
	if n_models == 1:
	axes = [axes]
	else:
	axes = axes.flatten() if rows > 1 else axes

	colors_map = {0: COLORS['secondary'], 1: COLORS['primary']}

	for idx, (model_name, scores) in enumerate(all_scores.items()):
	ax = axes[idx]

	for label in [0, 1]:
	mask = y_true == label
	label_name = 'Legitimate' if label == 0 else 'Phishing'
	ax.scatter(np.where(mask)[0], scores[mask],
	c=colors_map[label], label=label_name,
	alpha=0.6, s=50, edgecolors='black', linewidth=0.5)

	threshold = MODEL_THRESHOLDS.get(model_name, 0.5)
	ax.axhline(y=threshold, color='red', linestyle='--', linewidth=2,
	label=f'Threshold ({threshold})', alpha=0.7)

	ax.set_title(f'{model_name.upper()} Prediction Scores',
	fontsize=14, fontweight='bold', color=COLORS['text'])
	ax.set_xlabel('Sample Index', fontsize=11, fontweight='bold')
	ax.set_ylabel('Prediction Score', fontsize=11, fontweight='bold')
	ax.set_ylim([-0.1, 1.1])
	ax.legend(loc='best', framealpha=0.9)
	ax.grid(True, alpha=0.3, linestyle='--')

	for idx in range(n_models, len(axes)):
	fig.delaxes(axes[idx])

	plt.tight_layout()
	save_path = os.path.join(save_dir, 'score_vs_label.png')
	plt.savefig(save_path, dpi=300, bbox_inches='tight', facecolor='white')
	print(f"Saved score vs label plots to {save_path}")
	plt.close()

	def main():
	print("="*60)
	print("PHISHING DETECTION MODEL EVALUATION REPORT")
	print("="*60)
	print("\nCustom Thresholds Configuration:")
	for model, threshold in MODEL_THRESHOLDS.items():
	print(f" • {model}: {threshold}")
	print()

	os.makedirs(config.REPORTS_DIR, exist_ok=True)
	os.makedirs(config.MODELS_DIR, exist_ok=True)

	df = load_sample_data(sample_fraction=0.05)

	all_predictions = {}
	all_scores = {}

	X_ml, y = prepare_ml_data(df)
	ml_preds, ml_scores = predict_ml_models(X_ml, y)
	all_predictions.update(ml_preds)
	all_scores.update(ml_scores)

	X_dl, y_dl = prepare_dl_data(df)
	dl_preds, dl_scores = predict_dl_models(X_dl, y_dl)
	all_predictions.update(dl_preds)
	all_scores.update(dl_scores)

	bert_pred, bert_score = predict_bert_model(df, y)
	if bert_pred is not None:
	all_predictions['bert'] = bert_pred
	all_scores['bert'] = bert_score

	if len(all_predictions) == 0:
	print("\nWARNING: No models found! Please train models first.")
	print("Run: python train_ml.py && python train_dl.py")
	return

	plot_confusion_matrices(y, all_predictions, config.REPORTS_DIR)
	plot_accuracy_comparison(y, all_predictions, config.REPORTS_DIR)
	plot_score_vs_label(y, all_scores, config.REPORTS_DIR)

	print("\n" + "="*60)
	print("REPORT GENERATION COMPLETE!")
	print(f"All visualizations saved to: {config.REPORTS_DIR}")
	print("="*60)

	if __name__ == "__main__":
	main()