""" AI Writing Analyzer — sentence-level heat map for human vs. AI-generated text. Built for classroom use. Loads a RoBERTa-based ChatGPT detector from Hugging Face and runs it on each sentence independently, then renders the input text with per-sentence color coding indicating the probability that the sentence was AI-generated. Runs comfortably on the free CPU tier. """ import re import html import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification # --------------------------------------------------------------------------- # Model # --------------------------------------------------------------------------- # Hello-SimpleAI's RoBERTa detector — small, CPU-friendly, widely used. MODEL_NAME = "Hello-SimpleAI/chatgpt-detector-roberta" print(f"Loading model: {MODEL_NAME}") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) model.eval() # The model's label order: index 0 = Human, index 1 = ChatGPT/AI. # (Confirmed from the model card's id2label.) AI_INDEX = 1 # --------------------------------------------------------------------------- # Sentence splitting # --------------------------------------------------------------------------- _SENT_SPLIT_RE = re.compile(r"(?<=[.!?])\s+(?=[A-Z\"'\(\[])") def split_sentences(text: str): """Lightweight sentence splitter — no NLTK download needed on free CPU.""" text = text.strip() if not text: return [] # First split on paragraph breaks to preserve structure, then sentences. chunks = [] for para in re.split(r"\n\s*\n", text): para = para.strip() if not para: continue parts = _SENT_SPLIT_RE.split(para) parts = [p.strip() for p in parts if p.strip()] chunks.extend(parts) return chunks # --------------------------------------------------------------------------- # Scoring # --------------------------------------------------------------------------- @torch.no_grad() def score_sentence(sentence: str) -> float: """Return probability that `sentence` is AI-generated (0.0 – 1.0).""" inputs = tokenizer( sentence, return_tensors="pt", truncation=True, max_length=512, ) logits = model(**inputs).logits probs = torch.softmax(logits, dim=-1)[0] return float(probs[AI_INDEX].item()) # --------------------------------------------------------------------------- # Rendering # --------------------------------------------------------------------------- def prob_to_color(p: float) -> str: """ Map probability 0..1 to a background color. Low (human) -> cool teal Mid -> amber High (AI) -> warm red """ # Interpolate between three stops in RGB. if p < 0.5: t = p / 0.5 r = int(56 + (245 - 56) * t) g = int(189 + (191 - 189) * t) b = int(248 + (66 - 248) * t) else: t = (p - 0.5) / 0.5 r = int(245 + (248 - 245) * t) g = int(191 + (80 - 191) * t) b = int(66 + (80 - 66) * t) # Higher opacity for a vivid highlight; text is forced light on top. return f"rgba({r}, {g}, {b}, 0.42)" def border_color(p: float) -> str: if p < 0.5: t = p / 0.5 r = int(56 + (245 - 56) * t) g = int(189 + (191 - 189) * t) b = int(248 + (66 - 248) * t) else: t = (p - 0.5) / 0.5 r = int(245 + (248 - 245) * t) g = int(191 + (80 - 191) * t) b = int(66 + (80 - 66) * t) return f"rgba({r}, {g}, {b}, 0.95)" def render_heatmap(sentences, scores) -> str: if not sentences: return ( "

" "Paste some writing above and click Analyze to see a " "sentence-by-sentence breakdown.

" ) pieces = [] for sent, p in zip(sentences, scores): bg = prob_to_color(p) bd = border_color(p) pct = int(round(p * 100)) safe = html.escape(sent) pieces.append( f"{safe} " f"{pct}%" ) body = " ".join(pieces) avg = sum(scores) / len(scores) verdict, vcolor = classify_overall(avg) summary = ( f"

" f"

Overall assessment

" f"

" f"{verdict}

" f"

" f"Avg. AI likelihood: {int(round(avg*100))}% " f" · Sentences: {len(sentences)}

" f"

" ) legend = ( "

" "Legend:" "Likely human" "Uncertain" "Likely AI" "

" ) return ( f"

" f"{summary}" f"

{body}

" f"{legend}" f"

" ) def classify_overall(avg: float): if avg < 0.25: return "Likely human-written", "#38bdf8" if avg < 0.5: return "Leaning human", "#7dd3fc" if avg < 0.75: return "Leaning AI", "#fbbf24" return "Likely AI-generated", "#f87171" # --------------------------------------------------------------------------- # Main analyze function # --------------------------------------------------------------------------- def analyze(text: str): if not text or not text.strip(): return render_heatmap([], []) sentences = split_sentences(text) if not sentences: return render_heatmap([], []) scores = [score_sentence(s) for s in sentences] return render_heatmap(sentences, scores) # --------------------------------------------------------------------------- # UI # --------------------------------------------------------------------------- CUSTOM_CSS = """ :root, .gradio-container, body { background: #060912 !important; color: #e2e8f0 !important; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Inter, system-ui, sans-serif !important; } .gradio-container { max-width: 960px !important; margin: 0 auto !important; padding-top: 2.5rem !important; } #app-header { text-align: left; margin-bottom: 1.75rem; padding: 1.75rem 2rem; background: linear-gradient(135deg, #0f172a 0%, #111827 100%); border: 1px solid #1e293b; border-radius: 16px; } #app-header h1 { margin: 0 0 0.5rem 0; font-size: 1.9rem; font-weight: 700; letter-spacing: -0.02em; background: linear-gradient(90deg, #38bdf8 0%, #a78bfa 100%); -webkit-background-clip: text; background-clip: text; color: transparent; } #app-header p { margin: 0; color: #94a3b8; font-size: 0.98rem; line-height: 1.55; max-width: 62ch; } textarea { background: #0b1220 !important; border: 1px solid #1e293b !important; color: #e2e8f0 !important; border-radius: 12px !important; font-size: 0.98rem !important; line-height: 1.6 !important; } textarea:focus { border-color: #38bdf8 !important; box-shadow: 0 0 0 3px rgba(56,189,248,0.15) !important; } label span { color: #cbd5e1 !important; font-weight: 500 !important; } button.primary, .primary button { background: linear-gradient(135deg, #38bdf8 0%, #6366f1 100%) !important; border: none !important; color: #0b1220 !important; font-weight: 600 !important; border-radius: 10px !important; } button.secondary, .secondary button { background: #1e293b !important; border: 1px solid #334155 !important; color: #e2e8f0 !important; border-radius: 10px !important; } footer { display: none !important; } /* Force light text inside our custom HTML output — Gradio 6's prose styles otherwise darken anything rendered inside gr.HTML. */ .gradio-container .prose, .gradio-container .prose * , .gradio-container .html-container, .gradio-container .html-container * { color: #e2e8f0 !important; } .gradio-container .awa-sent, .gradio-container .awa-sent * { color: #f8fafc !important; } """ HEADER_HTML = """ """ EXAMPLE_TEXT = ( "The old lighthouse had stood on that cliff for nearly two centuries, " "its white paint worn thin by salt and wind. Every evening, Marta climbed " "the spiral stairs with a cup of tea balanced in one hand. " "In conclusion, lighthouses serve as vital navigational aids that have " "played a crucial role in maritime safety throughout history. " "Furthermore, they represent an important cultural and architectural heritage " "that must be preserved for future generations." ) with gr.Blocks(css=CUSTOM_CSS, title="AI Writing Analyzer", theme=gr.themes.Base()) as demo: gr.HTML(HEADER_HTML) with gr.Row(): input_box = gr.Textbox( label="Student writing", placeholder="Paste a passage of writing here…", lines=10, value=EXAMPLE_TEXT, ) with gr.Row(): analyze_btn = gr.Button("Analyze", variant="primary") clear_btn = gr.Button("Clear", variant="secondary") output = gr.HTML(value=render_heatmap([], [])) analyze_btn.click(fn=analyze, inputs=input_box, outputs=output) clear_btn.click( fn=lambda: ("", render_heatmap([], [])), inputs=None, outputs=[input_box, output], ) if __name__ == "__main__": demo.launch()