profplate's picture
Update app.py
bd3af38 verified
"""
AI Writing Analyzer — sentence-level heat map for human vs. AI-generated text.
Built for classroom use. Loads a RoBERTa-based ChatGPT detector from
Hugging Face and runs it on each sentence independently, then renders the
input text with per-sentence color coding indicating the probability that
the sentence was AI-generated.
Runs comfortably on the free CPU tier.
"""
import re
import html
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# ---------------------------------------------------------------------------
# Model
# ---------------------------------------------------------------------------
# Hello-SimpleAI's RoBERTa detector — small, CPU-friendly, widely used.
MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta"
print(f"Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval()
# The model's label order: index 0 = Human, index 1 = ChatGPT/AI.
# (Confirmed from the model card's id2label.)
AI_INDEX = 1
# ---------------------------------------------------------------------------
# Sentence splitting
# ---------------------------------------------------------------------------
_SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+(?=[A-Z\"'\(\[])")
def split_sentences(text: str):
"""Lightweight sentence splitter — no NLTK download needed on free CPU."""
text = text.strip()
if not text:
return []
# First split on paragraph breaks to preserve structure, then sentences.
chunks = []
for para in re.split(r"\n\s*\n", text):
para = para.strip()
if not para:
continue
parts = _SENT_SPLIT_RE.split(para)
parts = [p.strip() for p in parts if p.strip()]
chunks.extend(parts)
return chunks
# ---------------------------------------------------------------------------
# Scoring
# ---------------------------------------------------------------------------
@torch.no_grad()
def score_sentence(sentence: str) -> float:
"""Return probability that `sentence` is AI-generated (0.0 – 1.0)."""
inputs = tokenizer(
sentence,
return_tensors="pt",
truncation=True,
max_length=512,
)
logits = model(**inputs).logits
probs = torch.softmax(logits, dim=-1)[0]
return float(probs[AI_INDEX].item())
# ---------------------------------------------------------------------------
# Rendering
# ---------------------------------------------------------------------------
def prob_to_color(p: float) -> str:
"""
Map probability 0..1 to a background color.
Low (human) -> cool teal
Mid -> amber
High (AI) -> warm red
"""
# Interpolate between three stops in RGB.
if p < 0.5:
t = p / 0.5
r = int(56 + (245 - 56) * t)
g = int(189 + (191 - 189) * t)
b = int(248 + (66 - 248) * t)
else:
t = (p - 0.5) / 0.5
r = int(245 + (248 - 245) * t)
g = int(191 + (80 - 191) * t)
b = int(66 + (80 - 66) * t)
# Higher opacity for a vivid highlight; text is forced light on top.
return f"rgba({r}, {g}, {b}, 0.42)"
def border_color(p: float) -> str:
if p < 0.5:
t = p / 0.5
r = int(56 + (245 - 56) * t)
g = int(189 + (191 - 189) * t)
b = int(248 + (66 - 248) * t)
else:
t = (p - 0.5) / 0.5
r = int(245 + (248 - 245) * t)
g = int(191 + (80 - 191) * t)
b = int(66 + (80 - 66) * t)
return f"rgba({r}, {g}, {b}, 0.95)"
def render_heatmap(sentences, scores) -> str:
if not sentences:
return (
"<div style='color:#94a3b8; font-style:italic; padding:1rem;'>"
"Paste some writing above and click <b>Analyze</b> to see a "
"sentence-by-sentence breakdown.</div>"
)
pieces = []
for sent, p in zip(sentences, scores):
bg = prob_to_color(p)
bd = border_color(p)
pct = int(round(p * 100))
safe = html.escape(sent)
pieces.append(
f"<span class='awa-sent' title='AI likelihood: {pct}%' "
f"style='background:{bg} !important; "
f"border-bottom:2px solid {bd} !important; "
f"color:#f8fafc !important; "
f"text-shadow:0 1px 2px rgba(0,0,0,0.65); "
f"padding:3px 6px; margin:2px 1px; border-radius:5px; "
f"box-decoration-break:clone; -webkit-box-decoration-break:clone; "
f"line-height:2.3;'>{safe} "
f"<span style='font-size:0.72em; color:#f1f5f9 !important; "
f"font-weight:600; vertical-align:super; "
f"text-shadow:0 1px 2px rgba(0,0,0,0.7);'>{pct}%</span></span>"
)
body = " ".join(pieces)
avg = sum(scores) / len(scores)
verdict, vcolor = classify_overall(avg)
summary = (
f"<div style='display:flex; align-items:center; gap:1rem; "
f"margin-bottom:1.25rem; padding:1rem 1.25rem; "
f"background:#0f172a; border:1px solid #1e293b; border-radius:12px;'>"
f"<div style='font-size:0.78rem; letter-spacing:0.12em; "
f"text-transform:uppercase; color:#94a3b8;'>Overall assessment</div>"
f"<div style='font-size:1.15rem; font-weight:600; color:{vcolor};'>"
f"{verdict}</div>"
f"<div style='margin-left:auto; color:#cbd5e1; font-variant-numeric:tabular-nums;'>"
f"Avg. AI likelihood: <b style='color:#f1f5f9;'>{int(round(avg*100))}%</b> "
f"&nbsp;·&nbsp; Sentences: <b style='color:#f1f5f9;'>{len(sentences)}</b></div>"
f"</div>"
)
legend = (
"<div style='display:flex; gap:0.75rem; align-items:center; "
"margin-top:1.25rem; font-size:0.82rem; color:#94a3b8;'>"
"<span>Legend:</span>"
"<span style='background:rgba(56,189,248,0.28); padding:2px 10px; "
"border-radius:4px; border-bottom:2px solid rgba(56,189,248,0.95);'>Likely human</span>"
"<span style='background:rgba(245,191,66,0.28); padding:2px 10px; "
"border-radius:4px; border-bottom:2px solid rgba(245,191,66,0.95);'>Uncertain</span>"
"<span style='background:rgba(248,80,80,0.28); padding:2px 10px; "
"border-radius:4px; border-bottom:2px solid rgba(248,80,80,0.95);'>Likely AI</span>"
"</div>"
)
return (
f"<div style='font-family: -apple-system, BlinkMacSystemFont, "
f"\"Segoe UI\", Inter, sans-serif; color:#e2e8f0;'>"
f"{summary}"
f"<div style='padding:1.25rem 1.5rem; background:#0b1220; "
f"border:1px solid #1e293b; border-radius:12px; font-size:1rem; "
f"line-height:2.1;'>{body}</div>"
f"{legend}"
f"</div>"
)
def classify_overall(avg: float):
if avg < 0.25:
return "Likely human-written", "#38bdf8"
if avg < 0.5:
return "Leaning human", "#7dd3fc"
if avg < 0.75:
return "Leaning AI", "#fbbf24"
return "Likely AI-generated", "#f87171"
# ---------------------------------------------------------------------------
# Main analyze function
# ---------------------------------------------------------------------------
def analyze(text: str):
if not text or not text.strip():
return render_heatmap([], [])
sentences = split_sentences(text)
if not sentences:
return render_heatmap([], [])
scores = [score_sentence(s) for s in sentences]
return render_heatmap(sentences, scores)
# ---------------------------------------------------------------------------
# UI
# ---------------------------------------------------------------------------
CUSTOM_CSS = """
:root, .gradio-container, body {
background: #060912 !important;
color: #e2e8f0 !important;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, system-ui, sans-serif !important;
}
.gradio-container {
max-width: 960px !important;
margin: 0 auto !important;
padding-top: 2.5rem !important;
}
#app-header {
text-align: left;
margin-bottom: 1.75rem;
padding: 1.75rem 2rem;
background: linear-gradient(135deg, #0f172a 0%, #111827 100%);
border: 1px solid #1e293b;
border-radius: 16px;
}
#app-header h1 {
margin: 0 0 0.5rem 0;
font-size: 1.9rem;
font-weight: 700;
letter-spacing: -0.02em;
background: linear-gradient(90deg, #38bdf8 0%, #a78bfa 100%);
-webkit-background-clip: text;
background-clip: text;
color: transparent;
}
#app-header p {
margin: 0;
color: #94a3b8;
font-size: 0.98rem;
line-height: 1.55;
max-width: 62ch;
}
textarea {
background: #0b1220 !important;
border: 1px solid #1e293b !important;
color: #e2e8f0 !important;
border-radius: 12px !important;
font-size: 0.98rem !important;
line-height: 1.6 !important;
}
textarea:focus {
border-color: #38bdf8 !important;
box-shadow: 0 0 0 3px rgba(56,189,248,0.15) !important;
}
label span {
color: #cbd5e1 !important;
font-weight: 500 !important;
}
button.primary, .primary button {
background: linear-gradient(135deg, #38bdf8 0%, #6366f1 100%) !important;
border: none !important;
color: #0b1220 !important;
font-weight: 600 !important;
border-radius: 10px !important;
}
button.secondary, .secondary button {
background: #1e293b !important;
border: 1px solid #334155 !important;
color: #e2e8f0 !important;
border-radius: 10px !important;
}
footer { display: none !important; }
/* Force light text inside our custom HTML output — Gradio 6's prose styles
otherwise darken anything rendered inside gr.HTML. */
.gradio-container .prose,
.gradio-container .prose * ,
.gradio-container .html-container,
.gradio-container .html-container * {
color: #e2e8f0 !important;
}
.gradio-container .awa-sent,
.gradio-container .awa-sent * {
color: #f8fafc !important;
}
"""
HEADER_HTML = """
<div id="app-header">
<h1>AI Writing Analyzer</h1>
<p>A classroom tool for examining student writing sentence by sentence. Paste a
passage below and this tool will highlight each sentence with a color-coded
heat map showing how likely it is to have been generated by an AI model.
Use it as a starting point for conversation — not as a verdict.</p>
</div>
"""
EXAMPLE_TEXT = (
"The old lighthouse had stood on that cliff for nearly two centuries, "
"its white paint worn thin by salt and wind. Every evening, Marta climbed "
"the spiral stairs with a cup of tea balanced in one hand. "
"In conclusion, lighthouses serve as vital navigational aids that have "
"played a crucial role in maritime safety throughout history. "
"Furthermore, they represent an important cultural and architectural heritage "
"that must be preserved for future generations."
)
with gr.Blocks(css=CUSTOM_CSS, title="AI Writing Analyzer", theme=gr.themes.Base()) as demo:
gr.HTML(HEADER_HTML)
with gr.Row():
input_box = gr.Textbox(
label="Student writing",
placeholder="Paste a passage of writing here…",
lines=10,
value=EXAMPLE_TEXT,
)
with gr.Row():
analyze_btn = gr.Button("Analyze", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
output = gr.HTML(value=render_heatmap([], []))
analyze_btn.click(fn=analyze, inputs=input_box, outputs=output)
clear_btn.click(
fn=lambda: ("", render_heatmap([], [])),
inputs=None,
outputs=[input_box, output],
)
if __name__ == "__main__":
demo.launch()