Spaces:

build-small-hackathon
/

tiny-press

Running on Zero

pr/4

by sriharsha-cr - opened 9 days ago

←

Files changed (4) hide show

README.md CHANGED Viewed

@@ -19,14 +19,6 @@ tags:
   - text-compression
   - prompt-optimization
   - local-inference
-  - track:backyard
-  - track:wood
-  - sponsor:openbmb
-  - sponsor:openai
-  - sponsor:modal
-  - achievement:offgrid
-  - achievement:llama
-  - achievement:fieldnotes
 ---
 # TinyPress — Prompt Compression Engine
@@ -50,9 +42,8 @@ No cloud. No API bill. Two small models running quietly on your machine.
 ### Social Media Posts
-- Twitter Post - https://x.com/sriharsha_cr/status/2065662576684650879
-- LinkedIn Post - https://www.linkedin.com/posts/sriharsha-cr_tinypress-prompt-compression-engine-activity-7471426128331624448-aKfe
-- Blog Post on Hugging Face - https://huggingface.co/blog/build-small-hackathon/tiny-press
 ---

   - text-compression
   - prompt-optimization
   - local-inference
 ---
 # TinyPress — Prompt Compression Engine
 ### Social Media Posts
+- https://x.com/sriharsha_cr/status/2065662576684650879
+- https://www.linkedin.com/posts/sriharsha-cr_tinypress-prompt-compression-engine-activity-7471426128331624448-aKfe
 ---

config.py CHANGED Viewed

@@ -10,7 +10,7 @@ AVAILABLE_MODELS = [
     "HuggingFaceTB/SmolLM2-135M-Instruct",
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "Qwen/Qwen2.5-1.5B-Instruct",
-    "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
 ]
@@ -30,9 +30,10 @@ MODEL_INFO = {
         "Strong instruction-following for its size; reliably respects token budgets. "
         "Best balance of speed and quality."
     ),
-    "TinyLlama/TinyLlama-1.1B-Chat-v1.0": (
-        "🚀 **Fast · 1.1B params** — Loads in ~40 s.  \n"
-        "Fully open, no licence required. Good general-purpose compression at 1B scale."
     ),
     "Qwen/Qwen2.5-1.5B-Instruct": (
         "⚖️ **Balanced · 1.5B params** — Loads in ~60 s.  \n"

     "HuggingFaceTB/SmolLM2-135M-Instruct",
     "HuggingFaceTB/SmolLM2-360M-Instruct",
     "Qwen/Qwen2.5-1.5B-Instruct",
+    "meta-llama/Llama-3.2-1B-Instruct",
     "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "microsoft/Phi-3.5-mini-instruct",
 ]
         "Strong instruction-following for its size; reliably respects token budgets. "
         "Best balance of speed and quality."
     ),
+    "meta-llama/Llama-3.2-1B-Instruct": (
+        "🚀 **Fast · 1B params** — Loads in ~40 s.  \n"
+        "Meta's smallest Llama; good general-purpose compression. "
+        "Requires accepting the Llama licence on HF Hub."
     ),
     "Qwen/Qwen2.5-1.5B-Instruct": (
         "⚖️ **Balanced · 1.5B params** — Loads in ~60 s.  \n"

core/scorer.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import numpy as np
 from models.model_loader import get_embedder
@@ -11,7 +12,8 @@ except ImportError:
 @_gpu
 def semantic_score(original: str, compressed: str) -> float:
     embedder = get_embedder()
-    vecs = embedder.encode([original, compressed], convert_to_numpy=True)
     cos = float(
         np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
     )

+import torch
 import numpy as np
 from models.model_loader import get_embedder
 @_gpu
 def semantic_score(original: str, compressed: str) -> float:
     embedder = get_embedder()
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    vecs = embedder.encode([original, compressed], device=device, convert_to_numpy=True)
     cos = float(
         np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
     )

models/model_loader.py CHANGED Viewed

@@ -95,7 +95,7 @@ def switch_embedder(model_id: str) -> str:
 def _load_embedder(model_id: str):
     global _embedder, _current_embedder_id
-    _embedder = SentenceTransformer(model_id, device="cpu")
     _current_embedder_id = model_id

 def _load_embedder(model_id: str):
     global _embedder, _current_embedder_id
+    _embedder = SentenceTransformer(model_id)
     _current_embedder_id = model_id