CatoG commited on
Commit
d28a0dd
Β·
verified Β·
1 Parent(s): f914ed5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -17
app.py CHANGED
@@ -67,9 +67,11 @@ DEFAULT_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" # or TinyLlama, or stick with dist
67
 
68
  device = 0 if torch.cuda.is_available() else -1
69
 
70
- # Paths for fact storage (runtime, but in the app dir)
71
  ROOT_DIR = os.path.dirname(__file__)
72
  FACTS_FILE = os.path.join(ROOT_DIR, "facts_log.csv")
 
 
73
 
74
  # Globals for current model / tokenizer / generator
75
  tokenizer = None
@@ -156,20 +158,17 @@ def build_context(messages, user_message, facts):
156
  messages: list of {"role": "user"|"assistant", "content": "..."}
157
  facts: list of user-approved fact strings
158
 
159
- Build a prompt for a small causal LM.
 
 
160
  """
161
- # System prompt that explains the "fact" mechanism
162
- system_prompt = (
163
- "You are a helpful assistant. The user sometimes states facts about the world.\n"
164
- "Treat the following user-approved facts as true and try to keep your answers\n"
165
- "consistent with them whenever relevant. If they conflict with general knowledge,\n"
166
- "prefer the user-approved facts.\n\n"
167
- )
168
 
169
  convo = system_prompt
170
 
171
  if facts:
172
- convo += "User-approved facts:\n"
173
  # use only last N to avoid context explosion
174
  for f in facts[-50:]:
175
  convo += f"- {f}\n"
@@ -259,15 +258,18 @@ def thumb_down(last_user):
259
 
260
 
261
  # =========================================================
262
- # TRAINING ON FACTS
263
  # =========================================================
264
 
265
  def train_on_facts():
266
  """
267
  Supervised fine-tuning on fact statements provided by the user.
268
  Each fact is turned into a simple training text.
 
 
 
269
  """
270
- global model, text_generator
271
 
272
  if not os.path.exists(FACTS_FILE):
273
  return "No facts_log.csv file found."
@@ -306,7 +308,7 @@ def train_on_facts():
306
  training_args = TrainingArguments(
307
  output_dir="facts_ft",
308
  overwrite_output_dir=True,
309
- num_train_epochs=1,
310
  per_device_train_batch_size=2,
311
  learning_rate=5e-5,
312
  logging_steps=5,
@@ -321,6 +323,13 @@ def train_on_facts():
321
  data_collator=data_collator,
322
  )
323
 
 
 
 
 
 
 
 
324
  trainer.train()
325
 
326
  # Update pipeline with the fine-tuned model
@@ -332,7 +341,117 @@ def train_on_facts():
332
  device=device,
333
  )
334
 
335
- return f"Training on {len(df)} user-provided facts complete. The model has been tuned toward your facts."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
 
338
  # =========================================================
@@ -343,6 +462,7 @@ def reset_model_to_base(selected_model: str):
343
  """
344
  Reload the currently selected base model and discard any fine-tuning
345
  done in this session.
 
346
  """
347
  msg = load_model(selected_model)
348
  return msg
@@ -375,6 +495,7 @@ def on_model_change(model_name: str):
375
  """
376
  Called when the model dropdown changes.
377
  Reloads the model and returns a status string.
 
378
  """
379
  msg = load_model(model_name)
380
  return msg
@@ -387,7 +508,7 @@ def on_model_change(model_name: str):
387
  with gr.Blocks() as demo:
388
  gr.Markdown(
389
  """
390
- # πŸ§ͺ Fact-Tuning Demo
391
 
392
  This demo lets you **teach a language model new "facts"** and then
393
  **fine-tune its weights on those facts**.
@@ -395,9 +516,12 @@ with gr.Blocks() as demo:
395
  - Send a message (a claim or statement).
396
  - Click πŸ‘ to treat that message as a fact.
397
  - When you've added a few facts, click **"Train on my facts"**.
398
- - Then ask questions and see how the model's answers drift toward your "truth".
 
 
399
 
400
- > This is a toy example of **supervised fine-tuning from user feedback**.
 
401
  """
402
  )
403
 
@@ -491,6 +615,23 @@ with gr.Blocks() as demo:
491
  outputs=[facts_preview],
492
  )
493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494
  gr.Markdown("## 🧠 Model status")
495
 
496
  model_dropdown.change(
 
67
 
68
  device = 0 if torch.cuda.is_available() else -1
69
 
70
+ # Paths for fact storage and snapshots (runtime, but in the app dir)
71
  ROOT_DIR = os.path.dirname(__file__)
72
  FACTS_FILE = os.path.join(ROOT_DIR, "facts_log.csv")
73
+ BASE_SNAPSHOT_DIR = os.path.join(ROOT_DIR, "base_snapshot")
74
+ FT_SNAPSHOT_DIR = os.path.join(ROOT_DIR, "ft_snapshot")
75
 
76
  # Globals for current model / tokenizer / generator
77
  tokenizer = None
 
158
  messages: list of {"role": "user"|"assistant", "content": "..."}
159
  facts: list of user-approved fact strings
160
 
161
+ Build a prompt for a small causal LM for CHAT USE.
162
+ Facts are included as context, but the system instructions
163
+ do NOT talk about facts.
164
  """
165
+ # Neutral system prompt, no mention of facts here
166
+ system_prompt = "You are a helpful assistant.\n\n"
 
 
 
 
 
167
 
168
  convo = system_prompt
169
 
170
  if facts:
171
+ convo += "Previously approved user statements:\n"
172
  # use only last N to avoid context explosion
173
  for f in facts[-50:]:
174
  convo += f"- {f}\n"
 
258
 
259
 
260
  # =========================================================
261
+ # TRAINING ON FACTS + SNAPSHOTS
262
  # =========================================================
263
 
264
  def train_on_facts():
265
  """
266
  Supervised fine-tuning on fact statements provided by the user.
267
  Each fact is turned into a simple training text.
268
+ Also:
269
+ - saves a snapshot of the pre-training (base) model if not already saved
270
+ - saves a snapshot of the fine-tuned model after training
271
  """
272
+ global model, text_generator, tokenizer
273
 
274
  if not os.path.exists(FACTS_FILE):
275
  return "No facts_log.csv file found."
 
308
  training_args = TrainingArguments(
309
  output_dir="facts_ft",
310
  overwrite_output_dir=True,
311
+ num_train_epochs=3,
312
  per_device_train_batch_size=2,
313
  learning_rate=5e-5,
314
  logging_steps=5,
 
323
  data_collator=data_collator,
324
  )
325
 
326
+ # --- Save base snapshot (before training) if not already there ---
327
+ if not os.path.exists(BASE_SNAPSHOT_DIR) or len(os.listdir(BASE_SNAPSHOT_DIR)) == 0:
328
+ os.makedirs(BASE_SNAPSHOT_DIR, exist_ok=True)
329
+ model.save_pretrained(BASE_SNAPSHOT_DIR)
330
+ tokenizer.save_pretrained(BASE_SNAPSHOT_DIR)
331
+
332
+ # --- Train ---
333
  trainer.train()
334
 
335
  # Update pipeline with the fine-tuned model
 
341
  device=device,
342
  )
343
 
344
+ # --- Save fine-tuned snapshot ---
345
+ os.makedirs(FT_SNAPSHOT_DIR, exist_ok=True)
346
+ model.save_pretrained(FT_SNAPSHOT_DIR)
347
+ tokenizer.save_pretrained(FT_SNAPSHOT_DIR)
348
+
349
+ return (
350
+ f"Training on {len(df)} user-provided facts complete. "
351
+ f"The model has been tuned toward your facts. "
352
+ f"Base and fine-tuned snapshots saved."
353
+ )
354
+
355
+
356
+ # =========================================================
357
+ # PROBE: BEFORE vs AFTER (NO FACTS IN PROMPT)
358
+ # =========================================================
359
+
360
+ def probe_before_after(question: str) -> str:
361
+ """
362
+ Compare base vs fine-tuned model on a single question, side by side.
363
+
364
+ IMPORTANT:
365
+ - No system prompt about facts
366
+ - No facts injected
367
+ - Just a minimal 'User: ...\\nAssistant:' prompt
368
+ """
369
+
370
+ question = (question or "").strip()
371
+ if not question:
372
+ return "Please enter a question to probe."
373
+
374
+ # Check that we at least have a base snapshot
375
+ if not os.path.exists(BASE_SNAPSHOT_DIR) or len(os.listdir(BASE_SNAPSHOT_DIR)) == 0:
376
+ return (
377
+ "No base snapshot found. Train at least once on your facts so the app "
378
+ "can save 'before' and 'after' models."
379
+ )
380
+
381
+ # Load base snapshot
382
+ try:
383
+ base_tokenizer = AutoTokenizer.from_pretrained(BASE_SNAPSHOT_DIR)
384
+ base_model = AutoModelForCausalLM.from_pretrained(BASE_SNAPSHOT_DIR)
385
+ except Exception as e:
386
+ return f"Error loading base snapshot: {e}"
387
+
388
+ # For the fine-tuned model, we prefer the current in-memory model.
389
+ # If you want to force using only the snapshot, you could load from FT_SNAPSHOT_DIR.
390
+ ft_model = model
391
+ ft_tokenizer = tokenizer
392
+
393
+ if ft_model is None or ft_tokenizer is None:
394
+ return "Fine-tuned model is not available in memory. Try training on facts first."
395
+
396
+ # Build a minimal probe prompt (no facts, no special system instructions)
397
+ prompt = f"User: {question}\nAssistant:"
398
+
399
+ # Create pipelines for base and fine-tuned (greedy for stability)
400
+ base_pipe = pipeline(
401
+ "text-generation",
402
+ model=base_model,
403
+ tokenizer=base_tokenizer,
404
+ device=device,
405
+ )
406
+
407
+ ft_pipe = pipeline(
408
+ "text-generation",
409
+ model=ft_model,
410
+ tokenizer=ft_tokenizer,
411
+ device=device,
412
+ )
413
+
414
+ def run_pipe(p):
415
+ out = p(
416
+ prompt,
417
+ max_new_tokens=64,
418
+ do_sample=False, # greedy for deterministic comparison
419
+ pad_token_id=base_tokenizer.eos_token_id,
420
+ )
421
+ full = out[0]["generated_text"]
422
+ if "Assistant:" in full:
423
+ ans = full.split("Assistant:", 1)[1].strip()
424
+ else:
425
+ ans = full.strip()
426
+ return ans
427
+
428
+ try:
429
+ base_answer = run_pipe(base_pipe)
430
+ except Exception as e:
431
+ base_answer = f"Error generating with base model: {e}"
432
+
433
+ try:
434
+ ft_answer = run_pipe(ft_pipe)
435
+ except Exception as e:
436
+ ft_answer = f"Error generating with fine-tuned model: {e}"
437
+
438
+ report = f"""### Comparison Probe
439
+
440
+ **Question**
441
+
442
+ > {question}
443
+
444
+ **Base model (before fine-tuning)**
445
+
446
+ {base_answer}
447
+
448
+ ---
449
+
450
+ **Fine-tuned model (after training on your facts)**
451
+
452
+ {ft_answer}
453
+ """
454
+ return report
455
 
456
 
457
  # =========================================================
 
462
  """
463
  Reload the currently selected base model and discard any fine-tuning
464
  done in this session.
465
+ Note: This does NOT remove saved snapshots on disk.
466
  """
467
  msg = load_model(selected_model)
468
  return msg
 
495
  """
496
  Called when the model dropdown changes.
497
  Reloads the model and returns a status string.
498
+ (Snapshots on disk are not touched.)
499
  """
500
  msg = load_model(model_name)
501
  return msg
 
508
  with gr.Blocks() as demo:
509
  gr.Markdown(
510
  """
511
+ # πŸ§ͺ Fact-Tuning Demo (with Before/After Comparison)
512
 
513
  This demo lets you **teach a language model new "facts"** and then
514
  **fine-tune its weights on those facts**.
 
516
  - Send a message (a claim or statement).
517
  - Click πŸ‘ to treat that message as a fact.
518
  - When you've added a few facts, click **"Train on my facts"**.
519
+ - Then use the **comparison probe** to see how the base vs fine-tuned model
520
+ answer the **same question**, side by side, **without any facts injected
521
+ into the prompt**.
522
 
523
+ > This is a toy example of **supervised fine-tuning from user feedback**, and
524
+ > how it changes model behaviour compared to the original base model.
525
  """
526
  )
527
 
 
615
  outputs=[facts_preview],
616
  )
617
 
618
+ gr.Markdown("## πŸ” Comparison probe (before vs after fine-tuning)")
619
+
620
+ probe_question = gr.Textbox(
621
+ label="Probe question (no facts will be included in the prompt)",
622
+ placeholder="Example: What is the capital of Norway?",
623
+ )
624
+
625
+ probe_output = gr.Markdown(label="Probe result")
626
+
627
+ btn_probe = gr.Button("Run comparison probe")
628
+
629
+ btn_probe.click(
630
+ fn=probe_before_after,
631
+ inputs=[probe_question],
632
+ outputs=[probe_output],
633
+ )
634
+
635
  gr.Markdown("## 🧠 Model status")
636
 
637
  model_dropdown.change(