Spaces:
Paused
Paused
| """ | |
| AI Writing Analyzer — sentence-level heat map for human vs. AI-generated text. | |
| Built for classroom use. Loads a RoBERTa-based ChatGPT detector from | |
| Hugging Face and runs it on each sentence independently, then renders the | |
| input text with per-sentence color coding indicating the probability that | |
| the sentence was AI-generated. | |
| Runs comfortably on the free CPU tier. | |
| """ | |
| import re | |
| import html | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| # --------------------------------------------------------------------------- | |
| # Model | |
| # --------------------------------------------------------------------------- | |
| # Hello-SimpleAI's RoBERTa detector — small, CPU-friendly, widely used. | |
| MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta" | |
| print(f"Loading model: {MODEL_NAME}") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
| model.eval() | |
| # The model's label order: index 0 = Human, index 1 = ChatGPT/AI. | |
| # (Confirmed from the model card's id2label.) | |
| AI_INDEX = 1 | |
| # --------------------------------------------------------------------------- | |
| # Sentence splitting | |
| # --------------------------------------------------------------------------- | |
| _SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+(?=[A-Z\"'\(\[])") | |
| def split_sentences(text: str): | |
| """Lightweight sentence splitter — no NLTK download needed on free CPU.""" | |
| text = text.strip() | |
| if not text: | |
| return [] | |
| # First split on paragraph breaks to preserve structure, then sentences. | |
| chunks = [] | |
| for para in re.split(r"\n\s*\n", text): | |
| para = para.strip() | |
| if not para: | |
| continue | |
| parts = _SENT_SPLIT_RE.split(para) | |
| parts = [p.strip() for p in parts if p.strip()] | |
| chunks.extend(parts) | |
| return chunks | |
| # --------------------------------------------------------------------------- | |
| # Scoring | |
| # --------------------------------------------------------------------------- | |
| def score_sentence(sentence: str) -> float: | |
| """Return probability that `sentence` is AI-generated (0.0 – 1.0).""" | |
| inputs = tokenizer( | |
| sentence, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512, | |
| ) | |
| logits = model(**inputs).logits | |
| probs = torch.softmax(logits, dim=-1)[0] | |
| return float(probs[AI_INDEX].item()) | |
| # --------------------------------------------------------------------------- | |
| # Rendering | |
| # --------------------------------------------------------------------------- | |
| def prob_to_color(p: float) -> str: | |
| """ | |
| Map probability 0..1 to a background color. | |
| Low (human) -> cool teal | |
| Mid -> amber | |
| High (AI) -> warm red | |
| """ | |
| # Interpolate between three stops in RGB. | |
| if p < 0.5: | |
| t = p / 0.5 | |
| r = int(56 + (245 - 56) * t) | |
| g = int(189 + (191 - 189) * t) | |
| b = int(248 + (66 - 248) * t) | |
| else: | |
| t = (p - 0.5) / 0.5 | |
| r = int(245 + (248 - 245) * t) | |
| g = int(191 + (80 - 191) * t) | |
| b = int(66 + (80 - 66) * t) | |
| # Higher opacity for a vivid highlight; text is forced light on top. | |
| return f"rgba({r}, {g}, {b}, 0.42)" | |
| def border_color(p: float) -> str: | |
| if p < 0.5: | |
| t = p / 0.5 | |
| r = int(56 + (245 - 56) * t) | |
| g = int(189 + (191 - 189) * t) | |
| b = int(248 + (66 - 248) * t) | |
| else: | |
| t = (p - 0.5) / 0.5 | |
| r = int(245 + (248 - 245) * t) | |
| g = int(191 + (80 - 191) * t) | |
| b = int(66 + (80 - 66) * t) | |
| return f"rgba({r}, {g}, {b}, 0.95)" | |
| def render_heatmap(sentences, scores) -> str: | |
| if not sentences: | |
| return ( | |
| "<div style='color:#94a3b8; font-style:italic; padding:1rem;'>" | |
| "Paste some writing above and click <b>Analyze</b> to see a " | |
| "sentence-by-sentence breakdown.</div>" | |
| ) | |
| pieces = [] | |
| for sent, p in zip(sentences, scores): | |
| bg = prob_to_color(p) | |
| bd = border_color(p) | |
| pct = int(round(p * 100)) | |
| safe = html.escape(sent) | |
| pieces.append( | |
| f"<span class='awa-sent' title='AI likelihood: {pct}%' " | |
| f"style='background:{bg} !important; " | |
| f"border-bottom:2px solid {bd} !important; " | |
| f"color:#f8fafc !important; " | |
| f"text-shadow:0 1px 2px rgba(0,0,0,0.65); " | |
| f"padding:3px 6px; margin:2px 1px; border-radius:5px; " | |
| f"box-decoration-break:clone; -webkit-box-decoration-break:clone; " | |
| f"line-height:2.3;'>{safe} " | |
| f"<span style='font-size:0.72em; color:#f1f5f9 !important; " | |
| f"font-weight:600; vertical-align:super; " | |
| f"text-shadow:0 1px 2px rgba(0,0,0,0.7);'>{pct}%</span></span>" | |
| ) | |
| body = " ".join(pieces) | |
| avg = sum(scores) / len(scores) | |
| verdict, vcolor = classify_overall(avg) | |
| summary = ( | |
| f"<div style='display:flex; align-items:center; gap:1rem; " | |
| f"margin-bottom:1.25rem; padding:1rem 1.25rem; " | |
| f"background:#0f172a; border:1px solid #1e293b; border-radius:12px;'>" | |
| f"<div style='font-size:0.78rem; letter-spacing:0.12em; " | |
| f"text-transform:uppercase; color:#94a3b8;'>Overall assessment</div>" | |
| f"<div style='font-size:1.15rem; font-weight:600; color:{vcolor};'>" | |
| f"{verdict}</div>" | |
| f"<div style='margin-left:auto; color:#cbd5e1; font-variant-numeric:tabular-nums;'>" | |
| f"Avg. AI likelihood: <b style='color:#f1f5f9;'>{int(round(avg*100))}%</b> " | |
| f" · Sentences: <b style='color:#f1f5f9;'>{len(sentences)}</b></div>" | |
| f"</div>" | |
| ) | |
| legend = ( | |
| "<div style='display:flex; gap:0.75rem; align-items:center; " | |
| "margin-top:1.25rem; font-size:0.82rem; color:#94a3b8;'>" | |
| "<span>Legend:</span>" | |
| "<span style='background:rgba(56,189,248,0.28); padding:2px 10px; " | |
| "border-radius:4px; border-bottom:2px solid rgba(56,189,248,0.95);'>Likely human</span>" | |
| "<span style='background:rgba(245,191,66,0.28); padding:2px 10px; " | |
| "border-radius:4px; border-bottom:2px solid rgba(245,191,66,0.95);'>Uncertain</span>" | |
| "<span style='background:rgba(248,80,80,0.28); padding:2px 10px; " | |
| "border-radius:4px; border-bottom:2px solid rgba(248,80,80,0.95);'>Likely AI</span>" | |
| "</div>" | |
| ) | |
| return ( | |
| f"<div style='font-family: -apple-system, BlinkMacSystemFont, " | |
| f"\"Segoe UI\", Inter, sans-serif; color:#e2e8f0;'>" | |
| f"{summary}" | |
| f"<div style='padding:1.25rem 1.5rem; background:#0b1220; " | |
| f"border:1px solid #1e293b; border-radius:12px; font-size:1rem; " | |
| f"line-height:2.1;'>{body}</div>" | |
| f"{legend}" | |
| f"</div>" | |
| ) | |
| def classify_overall(avg: float): | |
| if avg < 0.25: | |
| return "Likely human-written", "#38bdf8" | |
| if avg < 0.5: | |
| return "Leaning human", "#7dd3fc" | |
| if avg < 0.75: | |
| return "Leaning AI", "#fbbf24" | |
| return "Likely AI-generated", "#f87171" | |
| # --------------------------------------------------------------------------- | |
| # Main analyze function | |
| # --------------------------------------------------------------------------- | |
| def analyze(text: str): | |
| if not text or not text.strip(): | |
| return render_heatmap([], []) | |
| sentences = split_sentences(text) | |
| if not sentences: | |
| return render_heatmap([], []) | |
| scores = [score_sentence(s) for s in sentences] | |
| return render_heatmap(sentences, scores) | |
| # --------------------------------------------------------------------------- | |
| # UI | |
| # --------------------------------------------------------------------------- | |
| CUSTOM_CSS = """ | |
| :root, .gradio-container, body { | |
| background: #060912 !important; | |
| color: #e2e8f0 !important; | |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, system-ui, sans-serif !important; | |
| } | |
| .gradio-container { | |
| max-width: 960px !important; | |
| margin: 0 auto !important; | |
| padding-top: 2.5rem !important; | |
| } | |
| #app-header { | |
| text-align: left; | |
| margin-bottom: 1.75rem; | |
| padding: 1.75rem 2rem; | |
| background: linear-gradient(135deg, #0f172a 0%, #111827 100%); | |
| border: 1px solid #1e293b; | |
| border-radius: 16px; | |
| } | |
| #app-header h1 { | |
| margin: 0 0 0.5rem 0; | |
| font-size: 1.9rem; | |
| font-weight: 700; | |
| letter-spacing: -0.02em; | |
| background: linear-gradient(90deg, #38bdf8 0%, #a78bfa 100%); | |
| -webkit-background-clip: text; | |
| background-clip: text; | |
| color: transparent; | |
| } | |
| #app-header p { | |
| margin: 0; | |
| color: #94a3b8; | |
| font-size: 0.98rem; | |
| line-height: 1.55; | |
| max-width: 62ch; | |
| } | |
| textarea { | |
| background: #0b1220 !important; | |
| border: 1px solid #1e293b !important; | |
| color: #e2e8f0 !important; | |
| border-radius: 12px !important; | |
| font-size: 0.98rem !important; | |
| line-height: 1.6 !important; | |
| } | |
| textarea:focus { | |
| border-color: #38bdf8 !important; | |
| box-shadow: 0 0 0 3px rgba(56,189,248,0.15) !important; | |
| } | |
| label span { | |
| color: #cbd5e1 !important; | |
| font-weight: 500 !important; | |
| } | |
| button.primary, .primary button { | |
| background: linear-gradient(135deg, #38bdf8 0%, #6366f1 100%) !important; | |
| border: none !important; | |
| color: #0b1220 !important; | |
| font-weight: 600 !important; | |
| border-radius: 10px !important; | |
| } | |
| button.secondary, .secondary button { | |
| background: #1e293b !important; | |
| border: 1px solid #334155 !important; | |
| color: #e2e8f0 !important; | |
| border-radius: 10px !important; | |
| } | |
| footer { display: none !important; } | |
| /* Force light text inside our custom HTML output — Gradio 6's prose styles | |
| otherwise darken anything rendered inside gr.HTML. */ | |
| .gradio-container .prose, | |
| .gradio-container .prose * , | |
| .gradio-container .html-container, | |
| .gradio-container .html-container * { | |
| color: #e2e8f0 !important; | |
| } | |
| .gradio-container .awa-sent, | |
| .gradio-container .awa-sent * { | |
| color: #f8fafc !important; | |
| } | |
| """ | |
| HEADER_HTML = """ | |
| <div id="app-header"> | |
| <h1>AI Writing Analyzer</h1> | |
| <p>A classroom tool for examining student writing sentence by sentence. Paste a | |
| passage below and this tool will highlight each sentence with a color-coded | |
| heat map showing how likely it is to have been generated by an AI model. | |
| Use it as a starting point for conversation — not as a verdict.</p> | |
| </div> | |
| """ | |
| EXAMPLE_TEXT = ( | |
| "The old lighthouse had stood on that cliff for nearly two centuries, " | |
| "its white paint worn thin by salt and wind. Every evening, Marta climbed " | |
| "the spiral stairs with a cup of tea balanced in one hand. " | |
| "In conclusion, lighthouses serve as vital navigational aids that have " | |
| "played a crucial role in maritime safety throughout history. " | |
| "Furthermore, they represent an important cultural and architectural heritage " | |
| "that must be preserved for future generations." | |
| ) | |
| with gr.Blocks(css=CUSTOM_CSS, title="AI Writing Analyzer", theme=gr.themes.Base()) as demo: | |
| gr.HTML(HEADER_HTML) | |
| with gr.Row(): | |
| input_box = gr.Textbox( | |
| label="Student writing", | |
| placeholder="Paste a passage of writing here…", | |
| lines=10, | |
| value=EXAMPLE_TEXT, | |
| ) | |
| with gr.Row(): | |
| analyze_btn = gr.Button("Analyze", variant="primary") | |
| clear_btn = gr.Button("Clear", variant="secondary") | |
| output = gr.HTML(value=render_heatmap([], [])) | |
| analyze_btn.click(fn=analyze, inputs=input_box, outputs=output) | |
| clear_btn.click( | |
| fn=lambda: ("", render_heatmap([], [])), | |
| inputs=None, | |
| outputs=[input_box, output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |