Update app.py
Browse files
app.py
CHANGED
|
@@ -21,14 +21,49 @@ from transformers import (
|
|
| 21 |
|
| 22 |
# Small / moderate models that work with AutoModelForCausalLM
|
| 23 |
MODEL_CHOICES = [
|
| 24 |
-
|
| 25 |
-
"
|
| 26 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 28 |
"google/gemma-3-1b-it",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
]
|
| 30 |
-
|
| 31 |
-
DEFAULT_MODEL = "distilgpt2" # safe default for CPU Space
|
| 32 |
|
| 33 |
device = 0 if torch.cuda.is_available() else -1
|
| 34 |
|
|
|
|
| 21 |
|
| 22 |
# Small / moderate models that work with AutoModelForCausalLM
|
| 23 |
MODEL_CHOICES = [
|
| 24 |
+
# Very small / light (good for CPU Spaces)
|
| 25 |
+
"distilgpt2",
|
| 26 |
+
"gpt2",
|
| 27 |
+
"sshleifer/tiny-gpt2",
|
| 28 |
+
"LiquidAI/LFM2-350M",
|
| 29 |
+
"google/gemma-3-270m-it",
|
| 30 |
+
"Qwen/Qwen2.5-0.5B-Instruct",
|
| 31 |
+
"mkurman/NeuroBLAST-V3-SYNTH-EC-150000",
|
| 32 |
+
|
| 33 |
+
# Small–medium (~1–2B) – still reasonable on CPU, just slower
|
| 34 |
"TinyLlama/TinyLlama-1.1B-Chat-v1.0",
|
| 35 |
"google/gemma-3-1b-it",
|
| 36 |
+
"meta-llama/Llama-3.2-1B",
|
| 37 |
+
"litert-community/Gemma3-1B-IT",
|
| 38 |
+
"nvidia/Nemotron-Flash-1B",
|
| 39 |
+
"WeiboAI/VibeThinker-1.5B",
|
| 40 |
+
"Qwen/Qwen3-1.7B",
|
| 41 |
+
|
| 42 |
+
# Medium (~2–3B) – probably OK on beefier CPU / small GPU
|
| 43 |
+
"google/gemma-2-2b-it",
|
| 44 |
+
"thu-pacman/PCMind-2.1-Kaiyuan-2B",
|
| 45 |
+
"opendatalab/MinerU-HTML", # 0.8B but more specialised, still fine
|
| 46 |
+
"ministral/Ministral-3b-instruct",
|
| 47 |
+
"HuggingFaceTB/SmolLM3-3B",
|
| 48 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
| 49 |
+
"nvidia/Nemotron-Flash-3B-Instruct",
|
| 50 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
| 51 |
+
|
| 52 |
+
# Heavier (4–8B) – you really want a GPU Space for these
|
| 53 |
+
"Qwen/Qwen3-4B",
|
| 54 |
+
"Qwen/Qwen3-4B-Thinking-2507",
|
| 55 |
+
"Qwen/Qwen3-4B-Instruct-2507",
|
| 56 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
| 57 |
+
"allenai/Olmo-3-7B-Instruct",
|
| 58 |
+
"Qwen/Qwen2.5-7B-Instruct",
|
| 59 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
| 60 |
+
"meta-llama/Llama-3.1-8B",
|
| 61 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
| 62 |
+
"openbmb/MiniCPM4.1-8B",
|
| 63 |
+
"deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
|
| 64 |
+
"rl-research/DR-Tulu-8B",
|
| 65 |
]
|
| 66 |
+
DEFAULT_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" # or TinyLlama, or stick with distilgpt2
|
|
|
|
| 67 |
|
| 68 |
device = 0 if torch.cuda.is_available() else -1
|
| 69 |
|