"""
AI Writing Analyzer — sentence-level heat map for human vs. AI-generated text.
Built for classroom use. Loads a RoBERTa-based ChatGPT detector from
Hugging Face and runs it on each sentence independently, then renders the
input text with per-sentence color coding indicating the probability that
the sentence was AI-generated.
Runs comfortably on the free CPU tier.
"""
import re
import html
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# ---------------------------------------------------------------------------
# Model
# ---------------------------------------------------------------------------
# Hello-SimpleAI's RoBERTa detector — small, CPU-friendly, widely used.
MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta"
print(f"Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval()
# The model's label order: index 0 = Human, index 1 = ChatGPT/AI.
# (Confirmed from the model card's id2label.)
AI_INDEX = 1
# ---------------------------------------------------------------------------
# Sentence splitting
# ---------------------------------------------------------------------------
_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+(?=[A-Z\"'\(\[])")
def split_sentences(text: str):
"""Lightweight sentence splitter — no NLTK download needed on free CPU."""
text = text.strip()
if not text:
return []
# First split on paragraph breaks to preserve structure, then sentences.
chunks = []
for para in re.split(r"\n\s*\n", text):
para = para.strip()
if not para:
continue
parts = _SENT_SPLIT_RE.split(para)
parts = [p.strip() for p in parts if p.strip()]
chunks.extend(parts)
return chunks
# ---------------------------------------------------------------------------
# Scoring
# ---------------------------------------------------------------------------
@torch.no_grad()
def score_sentence(sentence: str) -> float:
"""Return probability that `sentence` is AI-generated (0.0 – 1.0)."""
inputs = tokenizer(
sentence,
return_tensors="pt",
truncation=True,
max_length=512,
)
logits = model(**inputs).logits
probs = torch.softmax(logits, dim=-1)[0]
return float(probs[AI_INDEX].item())
# ---------------------------------------------------------------------------
# Rendering
# ---------------------------------------------------------------------------
def prob_to_color(p: float) -> str:
"""
Map probability 0..1 to a background color.
Low (human) -> cool teal
Mid -> amber
High (AI) -> warm red
"""
# Interpolate between three stops in RGB.
if p < 0.5:
t = p / 0.5
r = int(56 + (245 - 56) * t)
g = int(189 + (191 - 189) * t)
b = int(248 + (66 - 248) * t)
else:
t = (p - 0.5) / 0.5
r = int(245 + (248 - 245) * t)
g = int(191 + (80 - 191) * t)
b = int(66 + (80 - 66) * t)
# Higher opacity for a vivid highlight; text is forced light on top.
return f"rgba({r}, {g}, {b}, 0.42)"
def border_color(p: float) -> str:
if p < 0.5:
t = p / 0.5
r = int(56 + (245 - 56) * t)
g = int(189 + (191 - 189) * t)
b = int(248 + (66 - 248) * t)
else:
t = (p - 0.5) / 0.5
r = int(245 + (248 - 245) * t)
g = int(191 + (80 - 191) * t)
b = int(66 + (80 - 66) * t)
return f"rgba({r}, {g}, {b}, 0.95)"
def render_heatmap(sentences, scores) -> str:
if not sentences:
return (
"
"
"Paste some writing above and click Analyze to see a "
"sentence-by-sentence breakdown.
"
)
pieces = []
for sent, p in zip(sentences, scores):
bg = prob_to_color(p)
bd = border_color(p)
pct = int(round(p * 100))
safe = html.escape(sent)
pieces.append(
f"{safe} "
f"{pct}%"
)
body = " ".join(pieces)
avg = sum(scores) / len(scores)
verdict, vcolor = classify_overall(avg)
summary = (
f""
f"
Overall assessment
"
f"
"
f"{verdict}
"
f"
"
f"Avg. AI likelihood: {int(round(avg*100))}% "
f" · Sentences: {len(sentences)}
"
f"
"
)
legend = (
""
"Legend:"
"Likely human"
"Uncertain"
"Likely AI"
"
"
)
return (
f""
f"{summary}"
f"
{body}
"
f"{legend}"
f"
"
)
def classify_overall(avg: float):
if avg < 0.25:
return "Likely human-written", "#38bdf8"
if avg < 0.5:
return "Leaning human", "#7dd3fc"
if avg < 0.75:
return "Leaning AI", "#fbbf24"
return "Likely AI-generated", "#f87171"
# ---------------------------------------------------------------------------
# Main analyze function
# ---------------------------------------------------------------------------
def analyze(text: str):
if not text or not text.strip():
return render_heatmap([], [])
sentences = split_sentences(text)
if not sentences:
return render_heatmap([], [])
scores = [score_sentence(s) for s in sentences]
return render_heatmap(sentences, scores)
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
CUSTOM_CSS = """
:root, .gradio-container, body {
background: #060912 !important;
color: #e2e8f0 !important;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, system-ui, sans-serif !important;
}
.gradio-container {
max-width: 960px !important;
margin: 0 auto !important;
padding-top: 2.5rem !important;
}
#app-header {
text-align: left;
margin-bottom: 1.75rem;
padding: 1.75rem 2rem;
background: linear-gradient(135deg, #0f172a 0%, #111827 100%);
border: 1px solid #1e293b;
border-radius: 16px;
}
#app-header h1 {
margin: 0 0 0.5rem 0;
font-size: 1.9rem;
font-weight: 700;
letter-spacing: -0.02em;
background: linear-gradient(90deg, #38bdf8 0%, #a78bfa 100%);
-webkit-background-clip: text;
background-clip: text;
color: transparent;
}
#app-header p {
margin: 0;
color: #94a3b8;
font-size: 0.98rem;
line-height: 1.55;
max-width: 62ch;
}
textarea {
background: #0b1220 !important;
border: 1px solid #1e293b !important;
color: #e2e8f0 !important;
border-radius: 12px !important;
font-size: 0.98rem !important;
line-height: 1.6 !important;
}
textarea:focus {
border-color: #38bdf8 !important;
box-shadow: 0 0 0 3px rgba(56,189,248,0.15) !important;
}
label span {
color: #cbd5e1 !important;
font-weight: 500 !important;
}
button.primary, .primary button {
background: linear-gradient(135deg, #38bdf8 0%, #6366f1 100%) !important;
border: none !important;
color: #0b1220 !important;
font-weight: 600 !important;
border-radius: 10px !important;
}
button.secondary, .secondary button {
background: #1e293b !important;
border: 1px solid #334155 !important;
color: #e2e8f0 !important;
border-radius: 10px !important;
}
footer { display: none !important; }
/* Force light text inside our custom HTML output — Gradio 6's prose styles
otherwise darken anything rendered inside gr.HTML. */
.gradio-container .prose,
.gradio-container .prose * ,
.gradio-container .html-container,
.gradio-container .html-container * {
color: #e2e8f0 !important;
}
.gradio-container .awa-sent,
.gradio-container .awa-sent * {
color: #f8fafc !important;
}
"""
HEADER_HTML = """
"""
EXAMPLE_TEXT = (
"The old lighthouse had stood on that cliff for nearly two centuries, "
"its white paint worn thin by salt and wind. Every evening, Marta climbed "
"the spiral stairs with a cup of tea balanced in one hand. "
"In conclusion, lighthouses serve as vital navigational aids that have "
"played a crucial role in maritime safety throughout history. "
"Furthermore, they represent an important cultural and architectural heritage "
"that must be preserved for future generations."
)
with gr.Blocks(css=CUSTOM_CSS, title="AI Writing Analyzer", theme=gr.themes.Base()) as demo:
gr.HTML(HEADER_HTML)
with gr.Row():
input_box = gr.Textbox(
label="Student writing",
placeholder="Paste a passage of writing here…",
lines=10,
value=EXAMPLE_TEXT,
)
with gr.Row():
analyze_btn = gr.Button("Analyze", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
output = gr.HTML(value=render_heatmap([], []))
analyze_btn.click(fn=analyze, inputs=input_box, outputs=output)
clear_btn.click(
fn=lambda: ("", render_heatmap([], [])),
inputs=None,
outputs=[input_box, output],
)
if __name__ == "__main__":
demo.launch()