Spaces:

profplate
/

ai-detection-tool

Paused

App Files Files Community

ai-detection-tool / app.py

profplate

Update app.py

bd3af38 verified about 1 month ago

raw

history blame contribute delete

11.6 kB

	"""
	AI Writing Analyzer — sentence-level heat map for human vs. AI-generated text.

	Built for classroom use. Loads a RoBERTa-based ChatGPT detector from
	Hugging Face and runs it on each sentence independently, then renders the
	input text with per-sentence color coding indicating the probability that
	the sentence was AI-generated.

	Runs comfortably on the free CPU tier.
	"""

	import re
	import html
	import gradio as gr
	import torch
	from transformers import AutoTokenizer, AutoModelForSequenceClassification

	# ---------------------------------------------------------------------------
	# Model
	# ---------------------------------------------------------------------------
	# Hello-SimpleAI's RoBERTa detector — small, CPU-friendly, widely used.
	MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta"

	print(f"Loading model: {MODEL_NAME}")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
	model.eval()

	# The model's label order: index 0 = Human, index 1 = ChatGPT/AI.
	# (Confirmed from the model card's id2label.)
	AI_INDEX = 1


	# ---------------------------------------------------------------------------
	# Sentence splitting
	# ---------------------------------------------------------------------------
	_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+(?=[A-Z\"'\(\[])")

	def split_sentences(text: str):
	"""Lightweight sentence splitter — no NLTK download needed on free CPU."""
	text = text.strip()
	if not text:
	return []
	# First split on paragraph breaks to preserve structure, then sentences.
	chunks = []
	for para in re.split(r"\n\s*\n", text):
	para = para.strip()
	if not para:
	continue
	parts = _SENT_SPLIT_RE.split(para)
	parts = [p.strip() for p in parts if p.strip()]
	chunks.extend(parts)
	return chunks


	# ---------------------------------------------------------------------------
	# Scoring
	# ---------------------------------------------------------------------------
	@torch.no_grad()
	def score_sentence(sentence: str) -> float:
	"""Return probability that `sentence` is AI-generated (0.0 – 1.0)."""
	inputs = tokenizer(
	sentence,
	return_tensors="pt",
	truncation=True,
	max_length=512,
	)
	logits = model(**inputs).logits
	probs = torch.softmax(logits, dim=-1)[0]
	return float(probs[AI_INDEX].item())


	# ---------------------------------------------------------------------------
	# Rendering
	# ---------------------------------------------------------------------------
	def prob_to_color(p: float) -> str:
	"""
	Map probability 0..1 to a background color.
	Low (human) -> cool teal
	Mid -> amber
	High (AI) -> warm red
	"""
	# Interpolate between three stops in RGB.
	if p < 0.5:
	t = p / 0.5
	r = int(56 + (245 - 56) * t)
	g = int(189 + (191 - 189) * t)
	b = int(248 + (66 - 248) * t)
	else:
	t = (p - 0.5) / 0.5
	r = int(245 + (248 - 245) * t)
	g = int(191 + (80 - 191) * t)
	b = int(66 + (80 - 66) * t)
	# Higher opacity for a vivid highlight; text is forced light on top.
	return f"rgba({r}, {g}, {b}, 0.42)"


	def border_color(p: float) -> str:
	if p < 0.5:
	t = p / 0.5
	r = int(56 + (245 - 56) * t)
	g = int(189 + (191 - 189) * t)
	b = int(248 + (66 - 248) * t)
	else:
	t = (p - 0.5) / 0.5
	r = int(245 + (248 - 245) * t)
	g = int(191 + (80 - 191) * t)
	b = int(66 + (80 - 66) * t)
	return f"rgba({r}, {g}, {b}, 0.95)"


	def render_heatmap(sentences, scores) -> str:
	if not sentences:
	return (
	"<div style='color:#94a3b8; font-style:italic; padding:1rem;'>"
	"Paste some writing above and click <b>Analyze</b> to see a "
	"sentence-by-sentence breakdown.</div>"
	)

	pieces = []
	for sent, p in zip(sentences, scores):
	bg = prob_to_color(p)
	bd = border_color(p)
	pct = int(round(p * 100))
	safe = html.escape(sent)
	pieces.append(
	f"<span class='awa-sent' title='AI likelihood: {pct}%' "
	f"style='background:{bg} !important; "
	f"border-bottom:2px solid {bd} !important; "
	f"color:#f8fafc !important; "
	f"text-shadow:0 1px 2px rgba(0,0,0,0.65); "
	f"padding:3px 6px; margin:2px 1px; border-radius:5px; "
	f"box-decoration-break:clone; -webkit-box-decoration-break:clone; "
	f"line-height:2.3;'>{safe} "
	f"<span style='font-size:0.72em; color:#f1f5f9 !important; "
	f"font-weight:600; vertical-align:super; "
	f"text-shadow:0 1px 2px rgba(0,0,0,0.7);'>{pct}%</span></span>"
	)

	body = " ".join(pieces)

	avg = sum(scores) / len(scores)
	verdict, vcolor = classify_overall(avg)

	summary = (
	f"<div style='display:flex; align-items:center; gap:1rem; "
	f"margin-bottom:1.25rem; padding:1rem 1.25rem; "
	f"background:#0f172a; border:1px solid #1e293b; border-radius:12px;'>"
	f"<div style='font-size:0.78rem; letter-spacing:0.12em; "
	f"text-transform:uppercase; color:#94a3b8;'>Overall assessment</div>"
	f"<div style='font-size:1.15rem; font-weight:600; color:{vcolor};'>"
	f"{verdict}</div>"
	f"<div style='margin-left:auto; color:#cbd5e1; font-variant-numeric:tabular-nums;'>"
	f"Avg. AI likelihood: <b style='color:#f1f5f9;'>{int(round(avg*100))}%</b> "
	f" ·  Sentences: <b style='color:#f1f5f9;'>{len(sentences)}</b></div>"
	f"</div>"
	)

	legend = (
	"<div style='display:flex; gap:0.75rem; align-items:center; "
	"margin-top:1.25rem; font-size:0.82rem; color:#94a3b8;'>"
	"<span>Legend:</span>"
	"<span style='background:rgba(56,189,248,0.28); padding:2px 10px; "
	"border-radius:4px; border-bottom:2px solid rgba(56,189,248,0.95);'>Likely human</span>"
	"<span style='background:rgba(245,191,66,0.28); padding:2px 10px; "
	"border-radius:4px; border-bottom:2px solid rgba(245,191,66,0.95);'>Uncertain</span>"
	"<span style='background:rgba(248,80,80,0.28); padding:2px 10px; "
	"border-radius:4px; border-bottom:2px solid rgba(248,80,80,0.95);'>Likely AI</span>"
	"</div>"
	)

	return (
	f"<div style='font-family: -apple-system, BlinkMacSystemFont, "
	f"\"Segoe UI\", Inter, sans-serif; color:#e2e8f0;'>"
	f"{summary}"
	f"<div style='padding:1.25rem 1.5rem; background:#0b1220; "
	f"border:1px solid #1e293b; border-radius:12px; font-size:1rem; "
	f"line-height:2.1;'>{body}</div>"
	f"{legend}"
	f"</div>"
	)


	def classify_overall(avg: float):
	if avg < 0.25:
	return "Likely human-written", "#38bdf8"
	if avg < 0.5:
	return "Leaning human", "#7dd3fc"
	if avg < 0.75:
	return "Leaning AI", "#fbbf24"
	return "Likely AI-generated", "#f87171"


	# ---------------------------------------------------------------------------
	# Main analyze function
	# ---------------------------------------------------------------------------
	def analyze(text: str):
	if not text or not text.strip():
	return render_heatmap([], [])
	sentences = split_sentences(text)
	if not sentences:
	return render_heatmap([], [])
	scores = [score_sentence(s) for s in sentences]
	return render_heatmap(sentences, scores)


	# ---------------------------------------------------------------------------
	# UI
	# ---------------------------------------------------------------------------
	CUSTOM_CSS = """
	:root, .gradio-container, body {
	background: #060912 !important;
	color: #e2e8f0 !important;
	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, system-ui, sans-serif !important;
	}
	.gradio-container {
	max-width: 960px !important;
	margin: 0 auto !important;
	padding-top: 2.5rem !important;
	}
	#app-header {
	text-align: left;
	margin-bottom: 1.75rem;
	padding: 1.75rem 2rem;
	background: linear-gradient(135deg, #0f172a 0%, #111827 100%);
	border: 1px solid #1e293b;
	border-radius: 16px;
	}
	#app-header h1 {
	margin: 0 0 0.5rem 0;
	font-size: 1.9rem;
	font-weight: 700;
	letter-spacing: -0.02em;
	background: linear-gradient(90deg, #38bdf8 0%, #a78bfa 100%);
	-webkit-background-clip: text;
	background-clip: text;
	color: transparent;
	}
	#app-header p {
	margin: 0;
	color: #94a3b8;
	font-size: 0.98rem;
	line-height: 1.55;
	max-width: 62ch;
	}
	textarea {
	background: #0b1220 !important;
	border: 1px solid #1e293b !important;
	color: #e2e8f0 !important;
	border-radius: 12px !important;
	font-size: 0.98rem !important;
	line-height: 1.6 !important;
	}
	textarea:focus {
	border-color: #38bdf8 !important;
	box-shadow: 0 0 0 3px rgba(56,189,248,0.15) !important;
	}
	label span {
	color: #cbd5e1 !important;
	font-weight: 500 !important;
	}
	button.primary, .primary button {
	background: linear-gradient(135deg, #38bdf8 0%, #6366f1 100%) !important;
	border: none !important;
	color: #0b1220 !important;
	font-weight: 600 !important;
	border-radius: 10px !important;
	}
	button.secondary, .secondary button {
	background: #1e293b !important;
	border: 1px solid #334155 !important;
	color: #e2e8f0 !important;
	border-radius: 10px !important;
	}
	footer { display: none !important; }

	/* Force light text inside our custom HTML output — Gradio 6's prose styles
	otherwise darken anything rendered inside gr.HTML. */
	.gradio-container .prose,
	.gradio-container .prose * ,
	.gradio-container .html-container,
	.gradio-container .html-container * {
	color: #e2e8f0 !important;
	}
	.gradio-container .awa-sent,
	.gradio-container .awa-sent * {
	color: #f8fafc !important;
	}
	"""

	HEADER_HTML = """
	<div id="app-header">
	<h1>AI Writing Analyzer</h1>
	<p>A classroom tool for examining student writing sentence by sentence. Paste a
	passage below and this tool will highlight each sentence with a color-coded
	heat map showing how likely it is to have been generated by an AI model.
	Use it as a starting point for conversation — not as a verdict.</p>
	</div>
	"""

	EXAMPLE_TEXT = (
	"The old lighthouse had stood on that cliff for nearly two centuries, "
	"its white paint worn thin by salt and wind. Every evening, Marta climbed "
	"the spiral stairs with a cup of tea balanced in one hand. "
	"In conclusion, lighthouses serve as vital navigational aids that have "
	"played a crucial role in maritime safety throughout history. "
	"Furthermore, they represent an important cultural and architectural heritage "
	"that must be preserved for future generations."
	)

	with gr.Blocks(css=CUSTOM_CSS, title="AI Writing Analyzer", theme=gr.themes.Base()) as demo:
	gr.HTML(HEADER_HTML)

	with gr.Row():
	input_box = gr.Textbox(
	label="Student writing",
	placeholder="Paste a passage of writing here…",
	lines=10,
	value=EXAMPLE_TEXT,
	)

	with gr.Row():
	analyze_btn = gr.Button("Analyze", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	output = gr.HTML(value=render_heatmap([], []))

	analyze_btn.click(fn=analyze, inputs=input_box, outputs=output)
	clear_btn.click(
	fn=lambda: ("", render_heatmap([], [])),
	inputs=None,
	outputs=[input_box, output],
	)

	if __name__ == "__main__":
	demo.launch()