Files changed (4) hide show
  1. README.md +2 -11
  2. config.py +5 -4
  3. core/scorer.py +3 -1
  4. models/model_loader.py +1 -1
README.md CHANGED
@@ -19,14 +19,6 @@ tags:
19
  - text-compression
20
  - prompt-optimization
21
  - local-inference
22
- - track:backyard
23
- - track:wood
24
- - sponsor:openbmb
25
- - sponsor:openai
26
- - sponsor:modal
27
- - achievement:offgrid
28
- - achievement:llama
29
- - achievement:fieldnotes
30
  ---
31
 
32
  # TinyPress β€” Prompt Compression Engine
@@ -50,9 +42,8 @@ No cloud. No API bill. Two small models running quietly on your machine.
50
 
51
  ### Social Media Posts
52
 
53
- - Twitter Post - https://x.com/sriharsha_cr/status/2065662576684650879
54
- - LinkedIn Post - https://www.linkedin.com/posts/sriharsha-cr_tinypress-prompt-compression-engine-activity-7471426128331624448-aKfe
55
- - Blog Post on Hugging Face - https://huggingface.co/blog/build-small-hackathon/tiny-press
56
 
57
  ---
58
 
 
19
  - text-compression
20
  - prompt-optimization
21
  - local-inference
 
 
 
 
 
 
 
 
22
  ---
23
 
24
  # TinyPress β€” Prompt Compression Engine
 
42
 
43
  ### Social Media Posts
44
 
45
+ - https://x.com/sriharsha_cr/status/2065662576684650879
46
+ - https://www.linkedin.com/posts/sriharsha-cr_tinypress-prompt-compression-engine-activity-7471426128331624448-aKfe
 
47
 
48
  ---
49
 
config.py CHANGED
@@ -10,7 +10,7 @@ AVAILABLE_MODELS = [
10
  "HuggingFaceTB/SmolLM2-135M-Instruct",
11
  "HuggingFaceTB/SmolLM2-360M-Instruct",
12
  "Qwen/Qwen2.5-1.5B-Instruct",
13
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
14
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
15
  "microsoft/Phi-3.5-mini-instruct",
16
  ]
@@ -30,9 +30,10 @@ MODEL_INFO = {
30
  "Strong instruction-following for its size; reliably respects token budgets. "
31
  "Best balance of speed and quality."
32
  ),
33
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0": (
34
- "πŸš€ **Fast Β· 1.1B params** β€” Loads in ~40 s. \n"
35
- "Fully open, no licence required. Good general-purpose compression at 1B scale."
 
36
  ),
37
  "Qwen/Qwen2.5-1.5B-Instruct": (
38
  "βš–οΈ **Balanced Β· 1.5B params** β€” Loads in ~60 s. \n"
 
10
  "HuggingFaceTB/SmolLM2-135M-Instruct",
11
  "HuggingFaceTB/SmolLM2-360M-Instruct",
12
  "Qwen/Qwen2.5-1.5B-Instruct",
13
+ "meta-llama/Llama-3.2-1B-Instruct",
14
  "HuggingFaceTB/SmolLM2-1.7B-Instruct",
15
  "microsoft/Phi-3.5-mini-instruct",
16
  ]
 
30
  "Strong instruction-following for its size; reliably respects token budgets. "
31
  "Best balance of speed and quality."
32
  ),
33
+ "meta-llama/Llama-3.2-1B-Instruct": (
34
+ "πŸš€ **Fast Β· 1B params** β€” Loads in ~40 s. \n"
35
+ "Meta's smallest Llama; good general-purpose compression. "
36
+ "Requires accepting the Llama licence on HF Hub."
37
  ),
38
  "Qwen/Qwen2.5-1.5B-Instruct": (
39
  "βš–οΈ **Balanced Β· 1.5B params** β€” Loads in ~60 s. \n"
core/scorer.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import numpy as np
2
  from models.model_loader import get_embedder
3
 
@@ -11,7 +12,8 @@ except ImportError:
11
  @_gpu
12
  def semantic_score(original: str, compressed: str) -> float:
13
  embedder = get_embedder()
14
- vecs = embedder.encode([original, compressed], convert_to_numpy=True)
 
15
  cos = float(
16
  np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
17
  )
 
1
+ import torch
2
  import numpy as np
3
  from models.model_loader import get_embedder
4
 
 
12
  @_gpu
13
  def semantic_score(original: str, compressed: str) -> float:
14
  embedder = get_embedder()
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ vecs = embedder.encode([original, compressed], device=device, convert_to_numpy=True)
17
  cos = float(
18
  np.dot(vecs[0], vecs[1]) / (np.linalg.norm(vecs[0]) * np.linalg.norm(vecs[1]))
19
  )
models/model_loader.py CHANGED
@@ -95,7 +95,7 @@ def switch_embedder(model_id: str) -> str:
95
 
96
  def _load_embedder(model_id: str):
97
  global _embedder, _current_embedder_id
98
- _embedder = SentenceTransformer(model_id, device="cpu")
99
  _current_embedder_id = model_id
100
 
101
 
 
95
 
96
  def _load_embedder(model_id: str):
97
  global _embedder, _current_embedder_id
98
+ _embedder = SentenceTransformer(model_id)
99
  _current_embedder_id = model_id
100
 
101