Spaces:
Running on Zero
Running on Zero
pr/4
#5
by sriharsha-cr - opened
- README.md +2 -11
- config.py +5 -4
- core/scorer.py +3 -1
- models/model_loader.py +1 -1
README.md
CHANGED
|
@@ -19,14 +19,6 @@ tags:
|
|
| 19 |
- text-compression
|
| 20 |
- prompt-optimization
|
| 21 |
- local-inference
|
| 22 |
-
- track:backyard
|
| 23 |
-
- track:wood
|
| 24 |
-
- sponsor:openbmb
|
| 25 |
-
- sponsor:openai
|
| 26 |
-
- sponsor:modal
|
| 27 |
-
- achievement:offgrid
|
| 28 |
-
- achievement:llama
|
| 29 |
-
- achievement:fieldnotes
|
| 30 |
---
|
| 31 |
|
| 32 |
# TinyPress β Prompt Compression Engine
|
|
@@ -50,9 +42,8 @@ No cloud. No API bill. Two small models running quietly on your machine.
|
|
| 50 |
|
| 51 |
### Social Media Posts
|
| 52 |
|
| 53 |
-
-
|
| 54 |
-
-
|
| 55 |
-
- Blog Post on Hugging Face - https://huggingface.co/blog/build-small-hackathon/tiny-press
|
| 56 |
|
| 57 |
---
|
| 58 |
|
|
|
|
| 19 |
- text-compression
|
| 20 |
- prompt-optimization
|
| 21 |
- local-inference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
---
|
| 23 |
|
| 24 |
# TinyPress β Prompt Compression Engine
|
|
|
|
| 42 |
|
| 43 |
### Social Media Posts
|
| 44 |
|
| 45 |
+
- https://x.com/sriharsha_cr/status/2065662576684650879
|
| 46 |
+
- https://www.linkedin.com/posts/sriharsha-cr_tinypress-prompt-compression-engine-activity-7471426128331624448-aKfe
|
|
|
|
| 47 |
|
| 48 |
---
|
| 49 |
|
config.py
CHANGED
|
@@ -10,7 +10,7 @@ AVAILABLE_MODELS = [
|
|
| 10 |
"HuggingFaceTB/SmolLM2-135M-Instruct",
|
| 11 |
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
| 12 |
"Qwen/Qwen2.5-1.5B-Instruct",
|
| 13 |
-
"
|
| 14 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
|
| 15 |
"microsoft/Phi-3.5-mini-instruct",
|
| 16 |
]
|
|
@@ -30,9 +30,10 @@ MODEL_INFO = {
|
|
| 30 |
"Strong instruction-following for its size; reliably respects token budgets. "
|
| 31 |
"Best balance of speed and quality."
|
| 32 |
),
|
| 33 |
-
"
|
| 34 |
-
"π **Fast Β·
|
| 35 |
-
"
|
|
|
|
| 36 |
),
|
| 37 |
"Qwen/Qwen2.5-1.5B-Instruct": (
|
| 38 |
"βοΈ **Balanced Β· 1.5B params** β Loads in ~60 s. \n"
|
|
|
|
| 10 |
"HuggingFaceTB/SmolLM2-135M-Instruct",
|
| 11 |
"HuggingFaceTB/SmolLM2-360M-Instruct",
|
| 12 |
"Qwen/Qwen2.5-1.5B-Instruct",
|
| 13 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
| 14 |
"HuggingFaceTB/SmolLM2-1.7B-Instruct",
|
| 15 |
"microsoft/Phi-3.5-mini-instruct",
|
| 16 |
]
|
|
|
|
| 30 |
"Strong instruction-following for its size; reliably respects token budgets. "
|
| 31 |
"Best balance of speed and quality."
|
| 32 |
),
|
| 33 |
+
"meta-llama/Llama-3.2-1B-Instruct": (
|
| 34 |
+
"π **Fast Β· 1B params** β Loads in ~40 s. \n"
|
| 35 |
+
"Meta's smallest Llama; good general-purpose compression. "
|
| 36 |
+
"Requires accepting the Llama licence on HF Hub."
|
| 37 |
),
|
| 38 |
"Qwen/Qwen2.5-1.5B-Instruct": (
|
| 39 |
"βοΈ **Balanced Β· 1.5B params** β Loads in ~60 s. \n"
|
core/scorer.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
from models.model_loader import get_embedder
|
| 3 |
|
|
@@ -11,7 +12,8 @@ except ImportError:
|
|
| 11 |
@_gpu
|
| 12 |
def semantic_score(original: str, compressed: str) -> float:
|
| 13 |
embedder = get_embedder()
|
| 14 |
-
|
|
|
|
| 15 |
cos = float(
|
| 16 |
np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
|
| 17 |
)
|
|
|
|
| 1 |
+
import torch
|
| 2 |
import numpy as np
|
| 3 |
from models.model_loader import get_embedder
|
| 4 |
|
|
|
|
| 12 |
@_gpu
|
| 13 |
def semantic_score(original: str, compressed: str) -> float:
|
| 14 |
embedder = get_embedder()
|
| 15 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 16 |
+
vecs = embedder.encode([original, compressed], device=device, convert_to_numpy=True)
|
| 17 |
cos = float(
|
| 18 |
np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
|
| 19 |
)
|
models/model_loader.py
CHANGED
|
@@ -95,7 +95,7 @@ def switch_embedder(model_id: str) -> str:
|
|
| 95 |
|
| 96 |
def _load_embedder(model_id: str):
|
| 97 |
global _embedder, _current_embedder_id
|
| 98 |
-
_embedder = SentenceTransformer(model_id
|
| 99 |
_current_embedder_id = model_id
|
| 100 |
|
| 101 |
|
|
|
|
| 95 |
|
| 96 |
def _load_embedder(model_id: str):
|
| 97 |
global _embedder, _current_embedder_id
|
| 98 |
+
_embedder = SentenceTransformer(model_id)
|
| 99 |
_current_embedder_id = model_id
|
| 100 |
|
| 101 |
|