Phoenix21 commited on
Commit
29006c5
·
verified ·
1 Parent(s): 3be5f1a

Upload CodeAct fine-tuned model

Browse files
0000100_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63df8ee7078f69436fb178c94ed01c90246a89d965ce20d6d2bcefab04e63145
3
+ size 26631752
0000200_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2b574d8ce4eb379916a840a4e4bd1294e4102acefaffbc7abf5615eb32153c5
3
+ size 26631752
0000300_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264639b527b0f185c2b94f3445854b0cdbdfcb2e951b2f7e4da4566b790c491b
3
+ size 26631752
0000400_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:466e67a31b92b50a0034f72653021635d7c33d16ce357c69e7cbc028c74689aa
3
+ size 26631752
0000500_adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac831fc4e337596f97dbd44bf17bbcd49e61ffa48f1bef3bccfa3c2b083197b8
3
+ size 26631752
README.md ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - code
7
+ - codeact
8
+ - python
9
+ - mlx
10
+ - lora
11
+ base_model: Qwen/Qwen2.5-3B
12
+ pipeline_tag: text-generation
13
+ ---
14
+
15
+ # CodeAct Fine-tuned Qwen2.5-3B
16
+
17
+ A fine-tuned version of Qwen2.5-3B for code generation with self-evaluation feedback.
18
+
19
+ ## Model Description
20
+
21
+ This model was fine-tuned using the CodeAct approach with:
22
+ - **Base Model:** Qwen/Qwen2.5-3B
23
+ - **Training Method:** LoRA (Low-Rank Adaptation)
24
+ - **Training Data:** 100 curated Python programming examples
25
+ - **Categories:** Math, Strings, Lists, Algorithms, Data Structures
26
+
27
+ ## Usage
28
+
29
+ ### With MLX (Apple Silicon)
30
+ ```python
31
+ from mlx_lm import load, generate
32
+
33
+ model, tokenizer = load("Phoenix21/codeact-qwen2.5-3b")
34
+ # Or with adapter:
35
+ # model, tokenizer = load("Qwen/Qwen2.5-3B", adapter_path="Phoenix21/codeact-qwen2.5-3b")
36
+
37
+ response = generate(model, tokenizer, prompt="Calculate factorial of 5", max_tokens=200)
38
+ print(response)
39
+ ```
40
+
41
+ ### With PyTorch (CUDA/CPU)
42
+ ```python
43
+ from transformers import AutoModelForCausalLM, AutoTokenizer
44
+ from peft import PeftModel
45
+
46
+ base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B", trust_remote_code=True)
47
+ model = PeftModel.from_pretrained(base_model, "Phoenix21/codeact-qwen2.5-3b")
48
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B", trust_remote_code=True)
49
+ ```
50
+
51
+ ### Interactive Demo
52
+ ```bash
53
+ # Auto-detect backend (MLX/CUDA/CPU)
54
+ python interactive_universal.py
55
+
56
+ # Force specific backend
57
+ python interactive_universal.py --backend cuda
58
+ python interactive_universal.py --backend mlx
59
+ python interactive_universal.py --backend cpu
60
+ ```
61
+
62
+ ## Training Details
63
+
64
+ - **Iterations:** 500
65
+ - **Batch Size:** 1
66
+ - **LoRA Layers:** 16
67
+ - **Learning Rate:** 1e-5
68
+ - **Platform:** Apple M3 (MLX)
69
+
70
+ ## Response Format
71
+
72
+ The model uses structured tags:
73
+ - `<thought>reasoning</thought>` - Chain of thought
74
+ - `<execute>code</execute>` - Python code to execute
75
+ - `<solution>answer</solution>` - Final answer
76
+ - `<feedback>assessment</feedback>` - Self-evaluation
77
+
78
+ ## Example
79
+
80
+ **Input:** "Calculate the sum of squares from 1 to 10"
81
+
82
+ **Output:**
83
+ ```
84
+ <thought>Sum of squares formula: n(n+1)(2n+1)/6</thought>
85
+
86
+ <execute>
87
+ n = 10
88
+ result = n * (n + 1) * (2 * n + 1) // 6
89
+ print(result)
90
+ </execute>
91
+
92
+ <solution>Sum of squares from 1 to 10 is 385</solution>
93
+
94
+ <feedback>
95
+ score: 10
96
+ correctness: correct
97
+ efficiency: excellent
98
+ explanation: Used O(1) formula instead of O(n) loop
99
+ </feedback>
100
+ ```
101
+
102
+ ## License
103
+
104
+ Apache 2.0
adapter_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "adapter_path": "./models/codeact-mlx-qwen2.5-3b",
3
+ "batch_size": 1,
4
+ "config": null,
5
+ "data": "data/mlx_train",
6
+ "fine_tune_type": "lora",
7
+ "grad_accumulation_steps": 1,
8
+ "grad_checkpoint": false,
9
+ "iters": 500,
10
+ "learning_rate": 1e-05,
11
+ "lora_parameters": {
12
+ "rank": 8,
13
+ "dropout": 0.0,
14
+ "scale": 20.0
15
+ },
16
+ "lr_schedule": null,
17
+ "mask_prompt": false,
18
+ "max_seq_length": 2048,
19
+ "model": "Qwen/Qwen2.5-3B",
20
+ "num_layers": 16,
21
+ "optimizer": "adam",
22
+ "optimizer_config": {
23
+ "adam": {},
24
+ "adamw": {},
25
+ "muon": {},
26
+ "sgd": {},
27
+ "adafactor": {}
28
+ },
29
+ "project_name": null,
30
+ "report_to": null,
31
+ "resume_adapter_file": null,
32
+ "save_every": 100,
33
+ "seed": 0,
34
+ "steps_per_eval": 200,
35
+ "steps_per_report": 10,
36
+ "test": false,
37
+ "test_batches": 500,
38
+ "train": true,
39
+ "val_batches": 25
40
+ }
adapters.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac831fc4e337596f97dbd44bf17bbcd49e61ffa48f1bef3bccfa3c2b083197b8
3
+ size 26631752
interactive_universal.py ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Universal CodeAct Interactive Demo
3
+ Supports: CUDA (NVIDIA), MLX (Apple Silicon), CPU
4
+ Auto-detects best available backend
5
+ """
6
+ import re
7
+ import sys
8
+ import os
9
+ import argparse
10
+ from io import StringIO
11
+
12
+ # ============= BACKEND DETECTION =============
13
+ def detect_backend():
14
+ """Auto-detect the best available backend"""
15
+ # Check for MLX (Apple Silicon)
16
+ try:
17
+ import mlx.core as mx
18
+ return "mlx"
19
+ except ImportError:
20
+ pass
21
+
22
+ # Check for CUDA
23
+ try:
24
+ import torch
25
+ if torch.cuda.is_available():
26
+ return "cuda"
27
+ except ImportError:
28
+ pass
29
+
30
+ # Check for MPS (Apple Metal via PyTorch)
31
+ try:
32
+ import torch
33
+ if torch.backends.mps.is_available():
34
+ return "mps"
35
+ except:
36
+ pass
37
+
38
+ # Fallback to CPU
39
+ return "cpu"
40
+
41
+ # ============= MLX BACKEND =============
42
+ class MLXBackend:
43
+ def __init__(self, model_name, adapter_path=None):
44
+ from mlx_lm import load, generate
45
+ self.generate_fn = generate
46
+
47
+ if adapter_path and os.path.exists(adapter_path):
48
+ print(f"Loading MLX model with adapter: {adapter_path}")
49
+ self.model, self.tokenizer = load(model_name, adapter_path=adapter_path)
50
+ else:
51
+ print(f"Loading MLX model: {model_name}")
52
+ self.model, self.tokenizer = load(model_name)
53
+
54
+ def generate(self, prompt, max_tokens=400):
55
+ return self.generate_fn(
56
+ self.model,
57
+ self.tokenizer,
58
+ prompt=prompt,
59
+ max_tokens=max_tokens,
60
+ verbose=False
61
+ )
62
+
63
+ # ============= PYTORCH BACKEND (CUDA/MPS/CPU) =============
64
+ class PyTorchBackend:
65
+ def __init__(self, model_name, device="auto", adapter_path=None):
66
+ import torch
67
+ from transformers import AutoModelForCausalLM, AutoTokenizer
68
+
69
+ # Determine device
70
+ if device == "auto":
71
+ if torch.cuda.is_available():
72
+ self.device = "cuda"
73
+ elif torch.backends.mps.is_available():
74
+ self.device = "mps"
75
+ else:
76
+ self.device = "cpu"
77
+ else:
78
+ self.device = device
79
+
80
+ print(f"Loading PyTorch model on {self.device}: {model_name}")
81
+
82
+ self.tokenizer = AutoTokenizer.from_pretrained(
83
+ model_name,
84
+ trust_remote_code=True
85
+ )
86
+
87
+ # Load model with appropriate dtype
88
+ dtype = torch.float16 if self.device in ["cuda", "mps"] else torch.float32
89
+
90
+ self.model = AutoModelForCausalLM.from_pretrained(
91
+ model_name,
92
+ torch_dtype=dtype,
93
+ device_map=self.device if self.device == "cuda" else None,
94
+ trust_remote_code=True,
95
+ low_cpu_mem_usage=True
96
+ )
97
+
98
+ if self.device != "cuda":
99
+ self.model = self.model.to(self.device)
100
+
101
+ # Load LoRA adapter if available
102
+ if adapter_path and os.path.exists(adapter_path):
103
+ try:
104
+ from peft import PeftModel
105
+ print(f"Loading LoRA adapter: {adapter_path}")
106
+ self.model = PeftModel.from_pretrained(self.model, adapter_path)
107
+ except ImportError:
108
+ print("Warning: peft not installed, skipping adapter")
109
+
110
+ def generate(self, prompt, max_tokens=400):
111
+ import torch
112
+
113
+ inputs = self.tokenizer(prompt, return_tensors="pt")
114
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
115
+
116
+ with torch.no_grad():
117
+ outputs = self.model.generate(
118
+ **inputs,
119
+ max_new_tokens=max_tokens,
120
+ temperature=0.7,
121
+ do_sample=True,
122
+ top_p=0.95,
123
+ pad_token_id=self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
124
+ )
125
+
126
+ response = self.tokenizer.decode(
127
+ outputs[0][len(inputs['input_ids'][0]):],
128
+ skip_special_tokens=True
129
+ )
130
+ return response
131
+
132
+ # ============= CODE EXECUTION =============
133
+ def execute_code(code):
134
+ """Execute Python code and capture output"""
135
+ stdout_buffer = StringIO()
136
+ stderr_buffer = StringIO()
137
+ old_stdout, old_stderr = sys.stdout, sys.stderr
138
+
139
+ try:
140
+ sys.stdout = stdout_buffer
141
+ sys.stderr = stderr_buffer
142
+ namespace = {}
143
+ exec(code, namespace)
144
+ output = stdout_buffer.getvalue()
145
+ errors = stderr_buffer.getvalue()
146
+ return {"success": True, "output": output.strip() or None, "error": errors.strip() or None}
147
+ except Exception as e:
148
+ return {"success": False, "output": None, "error": str(e)}
149
+ finally:
150
+ sys.stdout, sys.stderr = old_stdout, old_stderr
151
+
152
+ # ============= MAIN DEMO CLASS =============
153
+ class CodeActDemo:
154
+ def __init__(self, backend="auto", model_name=None, adapter_path=None):
155
+ # Default model
156
+ if model_name is None:
157
+ model_name = "Qwen/Qwen2.5-3B"
158
+
159
+ # Default adapter paths
160
+ if adapter_path is None:
161
+ adapter_path = "./models/codeact-mlx-qwen2.5-3b"
162
+
163
+ # Auto-detect or use specified backend
164
+ if backend == "auto":
165
+ backend = detect_backend()
166
+
167
+ print(f"\n{'='*60}")
168
+ print(f"CodeAct Interactive Demo")
169
+ print(f"Backend: {backend.upper()}")
170
+ print(f"{'='*60}\n")
171
+
172
+ self.backend_name = backend
173
+
174
+ # Initialize backend
175
+ if backend == "mlx":
176
+ self.backend = MLXBackend(model_name, adapter_path)
177
+ else:
178
+ self.backend = PyTorchBackend(model_name, device=backend, adapter_path=adapter_path)
179
+
180
+ self.tokenizer = self.backend.tokenizer if hasattr(self.backend, 'tokenizer') else None
181
+ self.conversation_history = []
182
+
183
+ self.system_prompt = """You are a helpful AI assistant that executes Python code.
184
+ Use these tags:
185
+ - <thought>reasoning</thought> for thinking
186
+ - <execute>code</execute> for code
187
+ - <solution>answer</solution> for final answer
188
+ - <feedback>assessment</feedback> for self-evaluation"""
189
+
190
+ print("Model loaded successfully!\n")
191
+
192
+ def parse_response(self, response):
193
+ """Extract tags from response"""
194
+ parts = {'thought': None, 'execute': None, 'solution': None, 'feedback': None}
195
+ for tag in parts:
196
+ match = re.search(f'<{tag}>(.*?)</{tag}>', response, re.DOTALL)
197
+ if match:
198
+ parts[tag] = match.group(1).strip()
199
+ return parts
200
+
201
+ def build_prompt(self, user_input, execution_result=None):
202
+ """Build prompt with conversation history"""
203
+ messages = [{"role": "system", "content": self.system_prompt}]
204
+ messages.extend(self.conversation_history)
205
+
206
+ if execution_result:
207
+ content = f"Previous execution result: {execution_result}\n\nUser: {user_input}"
208
+ else:
209
+ content = user_input
210
+
211
+ messages.append({"role": "user", "content": content})
212
+
213
+ # Apply chat template
214
+ if hasattr(self.backend, 'tokenizer') and hasattr(self.backend.tokenizer, 'apply_chat_template'):
215
+ return self.backend.tokenizer.apply_chat_template(
216
+ messages, tokenize=False, add_generation_prompt=True
217
+ )
218
+ else:
219
+ return "\n".join([f"{m['role']}: {m['content']}" for m in messages]) + "\nassistant:"
220
+
221
+ def chat(self, user_input, execution_result=None):
222
+ """Generate response"""
223
+ prompt = self.build_prompt(user_input, execution_result)
224
+ return self.backend.generate(prompt, max_tokens=400)
225
+
226
+ def run(self):
227
+ """Run interactive loop"""
228
+ print("="*60)
229
+ print(f"Running on: {self.backend_name.upper()}")
230
+ print("="*60)
231
+ print("\nCommands:")
232
+ print(" - Type your question and press Enter")
233
+ print(" - 'clear' - Clear conversation history")
234
+ print(" - 'quit' - Exit")
235
+ print("="*60 + "\n")
236
+
237
+ last_execution_result = None
238
+
239
+ while True:
240
+ try:
241
+ user_input = input("\nYou: ").strip()
242
+
243
+ if not user_input:
244
+ continue
245
+
246
+ if user_input.lower() in ['quit', 'exit', 'q']:
247
+ print("\nGoodbye!")
248
+ break
249
+
250
+ if user_input.lower() == 'clear':
251
+ self.conversation_history = []
252
+ last_execution_result = None
253
+ print("Conversation cleared")
254
+ continue
255
+
256
+ print("\n[Generating...]", end=" ", flush=True)
257
+ response = self.chat(user_input, last_execution_result)
258
+ print("Done!\n")
259
+
260
+ parts = self.parse_response(response)
261
+
262
+ if parts['thought']:
263
+ print(f"Thought:\n{parts['thought']}\n")
264
+
265
+ if parts['execute']:
266
+ print(f"Code:\n```python\n{parts['execute']}\n```\n")
267
+ print("Executing...\n")
268
+
269
+ result = execute_code(parts['execute'])
270
+
271
+ if result["success"]:
272
+ if result["output"]:
273
+ print(f"Output:\n{result['output']}")
274
+ last_execution_result = f"Output: {result['output']}"
275
+
276
+ print("\n" + "-"*40)
277
+ feedback = input("Is this correct? (y/n/skip): ").strip().lower()
278
+
279
+ if feedback == 'n':
280
+ print("\nMarked as incorrect")
281
+ last_execution_result += " [INCORRECT]"
282
+ elif feedback == 'y':
283
+ print("\nCorrect!")
284
+ last_execution_result = None
285
+ else:
286
+ last_execution_result = None
287
+
288
+ self.conversation_history.append({"role": "user", "content": user_input})
289
+ self.conversation_history.append({"role": "assistant", "content": response})
290
+ else:
291
+ print("Code executed (no output)")
292
+ last_execution_result = None
293
+
294
+ if result["error"]:
295
+ print(f"Warnings: {result['error']}")
296
+ else:
297
+ print(f"Error: {result['error']}")
298
+ last_execution_result = f"Error: {result['error']}"
299
+
300
+ if parts['solution']:
301
+ print(f"\nSolution:\n{parts['solution']}")
302
+
303
+ if parts['feedback']:
304
+ print(f"\nFeedback:\n{parts['feedback']}")
305
+
306
+ if not any(parts.values()):
307
+ print(f"Response:\n{response[:500]}")
308
+
309
+ # Limit history
310
+ if len(self.conversation_history) > 10:
311
+ self.conversation_history = self.conversation_history[-10:]
312
+
313
+ print("\n" + "="*60)
314
+
315
+ except KeyboardInterrupt:
316
+ print("\n\nInterrupted. Goodbye!")
317
+ break
318
+ except Exception as e:
319
+ print(f"\nError: {e}")
320
+ import traceback
321
+ traceback.print_exc()
322
+
323
+ def main():
324
+ parser = argparse.ArgumentParser(description="CodeAct Interactive Demo")
325
+ parser.add_argument("--backend", choices=["auto", "cuda", "mps", "mlx", "cpu"],
326
+ default="auto", help="Backend to use (default: auto)")
327
+ parser.add_argument("--model", type=str, default="Qwen/Qwen2.5-3B",
328
+ help="Model name or path")
329
+ parser.add_argument("--adapter", type=str, default=None,
330
+ help="Path to LoRA adapter")
331
+
332
+ args = parser.parse_args()
333
+
334
+ demo = CodeActDemo(
335
+ backend=args.backend,
336
+ model_name=args.model,
337
+ adapter_path=args.adapter
338
+ )
339
+ demo.run()
340
+
341
+ if __name__ == "__main__":
342
+ main()