Upload CodeAct fine-tuned model

Browse files

Files changed (9) hide show

0000100_adapters.safetensors +3 -0
0000200_adapters.safetensors +3 -0
0000300_adapters.safetensors +3 -0
0000400_adapters.safetensors +3 -0
0000500_adapters.safetensors +3 -0
README.md +104 -0
adapter_config.json +40 -0
adapters.safetensors +3 -0
interactive_universal.py +342 -0

0000100_adapters.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:63df8ee7078f69436fb178c94ed01c90246a89d965ce20d6d2bcefab04e63145
+size 26631752

0000200_adapters.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2b574d8ce4eb379916a840a4e4bd1294e4102acefaffbc7abf5615eb32153c5
+size 26631752

0000300_adapters.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:264639b527b0f185c2b94f3445854b0cdbdfcb2e951b2f7e4da4566b790c491b
+size 26631752

0000400_adapters.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:466e67a31b92b50a0034f72653021635d7c33d16ce357c69e7cbc028c74689aa
+size 26631752

0000500_adapters.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac831fc4e337596f97dbd44bf17bbcd49e61ffa48f1bef3bccfa3c2b083197b8
+size 26631752

README.md ADDED Viewed

	@@ -0,0 +1,104 @@

+---
+license: apache-2.0
+language:
+- en
+tags:
+- code
+- codeact
+- python
+- mlx
+- lora
+base_model: Qwen/Qwen2.5-3B
+pipeline_tag: text-generation
+---
+# CodeAct Fine-tuned Qwen2.5-3B
+A fine-tuned version of Qwen2.5-3B for code generation with self-evaluation feedback.
+## Model Description
+This model was fine-tuned using the CodeAct approach with:
+- **Base Model:** Qwen/Qwen2.5-3B
+- **Training Method:** LoRA (Low-Rank Adaptation)
+- **Training Data:** 100 curated Python programming examples
+- **Categories:** Math, Strings, Lists, Algorithms, Data Structures
+## Usage
+### With MLX (Apple Silicon)
+```python
+from mlx_lm import load, generate
+model, tokenizer = load("Phoenix21/codeact-qwen2.5-3b")
+# Or with adapter:
+# model, tokenizer = load("Qwen/Qwen2.5-3B", adapter_path="Phoenix21/codeact-qwen2.5-3b")
+response = generate(model, tokenizer, prompt="Calculate factorial of 5", max_tokens=200)
+print(response)
+```
+### With PyTorch (CUDA/CPU)
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+base_model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-3B", trust_remote_code=True)
+model = PeftModel.from_pretrained(base_model, "Phoenix21/codeact-qwen2.5-3b")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B", trust_remote_code=True)
+```
+### Interactive Demo
+```bash
+# Auto-detect backend (MLX/CUDA/CPU)
+python interactive_universal.py
+# Force specific backend
+python interactive_universal.py --backend cuda
+python interactive_universal.py --backend mlx
+python interactive_universal.py --backend cpu
+```
+## Training Details
+- **Iterations:** 500
+- **Batch Size:** 1
+- **LoRA Layers:** 16
+- **Learning Rate:** 1e-5
+- **Platform:** Apple M3 (MLX)
+## Response Format
+The model uses structured tags:
+- `<thought>reasoning</thought>` - Chain of thought
+- `<execute>code</execute>` - Python code to execute
+- `<solution>answer</solution>` - Final answer
+- `<feedback>assessment</feedback>` - Self-evaluation
+## Example
+**Input:** "Calculate the sum of squares from 1 to 10"
+**Output:**
+```
+<thought>Sum of squares formula: n(n+1)(2n+1)/6</thought>
+<execute>
+n = 10
+result = n * (n + 1) * (2 * n + 1) // 6
+print(result)
+</execute>
+<solution>Sum of squares from 1 to 10 is 385</solution>
+<feedback>
+score: 10
+correctness: correct
+efficiency: excellent
+explanation: Used O(1) formula instead of O(n) loop
+</feedback>
+```
+## License
+Apache 2.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+    "adapter_path": "./models/codeact-mlx-qwen2.5-3b",
+    "batch_size": 1,
+    "config": null,
+    "data": "data/mlx_train",
+    "fine_tune_type": "lora",
+    "grad_accumulation_steps": 1,
+    "grad_checkpoint": false,
+    "iters": 500,
+    "learning_rate": 1e-05,
+    "lora_parameters": {
+        "rank": 8,
+        "dropout": 0.0,
+        "scale": 20.0
+    },
+    "lr_schedule": null,
+    "mask_prompt": false,
+    "max_seq_length": 2048,
+    "model": "Qwen/Qwen2.5-3B",
+    "num_layers": 16,
+    "optimizer": "adam",
+    "optimizer_config": {
+        "adam": {},
+        "adamw": {},
+        "muon": {},
+        "sgd": {},
+        "adafactor": {}
+    },
+    "project_name": null,
+    "report_to": null,
+    "resume_adapter_file": null,
+    "save_every": 100,
+    "seed": 0,
+    "steps_per_eval": 200,
+    "steps_per_report": 10,
+    "test": false,
+    "test_batches": 500,
+    "train": true,
+    "val_batches": 25
+}

adapters.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac831fc4e337596f97dbd44bf17bbcd49e61ffa48f1bef3bccfa3c2b083197b8
+size 26631752

interactive_universal.py ADDED Viewed

	@@ -0,0 +1,342 @@

+"""
+Universal CodeAct Interactive Demo
+Supports: CUDA (NVIDIA), MLX (Apple Silicon), CPU
+Auto-detects best available backend
+"""
+import re
+import sys
+import os
+import argparse
+from io import StringIO
+# ============= BACKEND DETECTION =============
+def detect_backend():
+    """Auto-detect the best available backend"""
+    # Check for MLX (Apple Silicon)
+    try:
+        import mlx.core as mx
+        return "mlx"
+    except ImportError:
+        pass
+    # Check for CUDA
+    try:
+        import torch
+        if torch.cuda.is_available():
+            return "cuda"
+    except ImportError:
+        pass
+    # Check for MPS (Apple Metal via PyTorch)
+    try:
+        import torch
+        if torch.backends.mps.is_available():
+            return "mps"
+    except:
+        pass
+    # Fallback to CPU
+    return "cpu"
+# ============= MLX BACKEND =============
+class MLXBackend:
+    def __init__(self, model_name, adapter_path=None):
+        from mlx_lm import load, generate
+        self.generate_fn = generate
+        if adapter_path and os.path.exists(adapter_path):
+            print(f"Loading MLX model with adapter: {adapter_path}")
+            self.model, self.tokenizer = load(model_name, adapter_path=adapter_path)
+        else:
+            print(f"Loading MLX model: {model_name}")
+            self.model, self.tokenizer = load(model_name)
+    def generate(self, prompt, max_tokens=400):
+        return self.generate_fn(
+            self.model,
+            self.tokenizer,
+            prompt=prompt,
+            max_tokens=max_tokens,
+            verbose=False
+        )
+# ============= PYTORCH BACKEND (CUDA/MPS/CPU) =============
+class PyTorchBackend:
+    def __init__(self, model_name, device="auto", adapter_path=None):
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        # Determine device
+        if device == "auto":
+            if torch.cuda.is_available():
+                self.device = "cuda"
+            elif torch.backends.mps.is_available():
+                self.device = "mps"
+            else:
+                self.device = "cpu"
+        else:
+            self.device = device
+        print(f"Loading PyTorch model on {self.device}: {model_name}")
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True
+        )
+        # Load model with appropriate dtype
+        dtype = torch.float16 if self.device in ["cuda", "mps"] else torch.float32
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=dtype,
+            device_map=self.device if self.device == "cuda" else None,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        if self.device != "cuda":
+            self.model = self.model.to(self.device)
+        # Load LoRA adapter if available
+        if adapter_path and os.path.exists(adapter_path):
+            try:
+                from peft import PeftModel
+                print(f"Loading LoRA adapter: {adapter_path}")
+                self.model = PeftModel.from_pretrained(self.model, adapter_path)
+            except ImportError:
+                print("Warning: peft not installed, skipping adapter")
+    def generate(self, prompt, max_tokens=400):
+        import torch
+        inputs = self.tokenizer(prompt, return_tensors="pt")
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        with torch.no_grad():
+            outputs = self.model.generate(
+                **inputs,
+                max_new_tokens=max_tokens,
+                temperature=0.7,
+                do_sample=True,
+                top_p=0.95,
+                pad_token_id=self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
+            )
+        response = self.tokenizer.decode(
+            outputs[0][len(inputs['input_ids'][0]):],
+            skip_special_tokens=True
+        )
+        return response
+# ============= CODE EXECUTION =============
+def execute_code(code):
+    """Execute Python code and capture output"""
+    stdout_buffer = StringIO()
+    stderr_buffer = StringIO()
+    old_stdout, old_stderr = sys.stdout, sys.stderr
+    try:
+        sys.stdout = stdout_buffer
+        sys.stderr = stderr_buffer
+        namespace = {}
+        exec(code, namespace)
+        output = stdout_buffer.getvalue()
+        errors = stderr_buffer.getvalue()
+        return {"success": True, "output": output.strip() or None, "error": errors.strip() or None}
+    except Exception as e:
+        return {"success": False, "output": None, "error": str(e)}
+    finally:
+        sys.stdout, sys.stderr = old_stdout, old_stderr
+# ============= MAIN DEMO CLASS =============
+class CodeActDemo:
+    def __init__(self, backend="auto", model_name=None, adapter_path=None):
+        # Default model
+        if model_name is None:
+            model_name = "Qwen/Qwen2.5-3B"
+        # Default adapter paths
+        if adapter_path is None:
+            adapter_path = "./models/codeact-mlx-qwen2.5-3b"
+        # Auto-detect or use specified backend
+        if backend == "auto":
+            backend = detect_backend()
+        print(f"\n{'='*60}")
+        print(f"CodeAct Interactive Demo")
+        print(f"Backend: {backend.upper()}")
+        print(f"{'='*60}\n")
+        self.backend_name = backend
+        # Initialize backend
+        if backend == "mlx":
+            self.backend = MLXBackend(model_name, adapter_path)
+        else:
+            self.backend = PyTorchBackend(model_name, device=backend, adapter_path=adapter_path)
+        self.tokenizer = self.backend.tokenizer if hasattr(self.backend, 'tokenizer') else None
+        self.conversation_history = []
+        self.system_prompt = """You are a helpful AI assistant that executes Python code.
+Use these tags:
+- <thought>reasoning</thought> for thinking
+- <execute>code</execute> for code
+- <solution>answer</solution> for final answer
+- <feedback>assessment</feedback> for self-evaluation"""
+        print("Model loaded successfully!\n")
+    def parse_response(self, response):
+        """Extract tags from response"""
+        parts = {'thought': None, 'execute': None, 'solution': None, 'feedback': None}
+        for tag in parts:
+            match = re.search(f'<{tag}>(.*?)</{tag}>', response, re.DOTALL)
+            if match:
+                parts[tag] = match.group(1).strip()
+        return parts
+    def build_prompt(self, user_input, execution_result=None):
+        """Build prompt with conversation history"""
+        messages = [{"role": "system", "content": self.system_prompt}]
+        messages.extend(self.conversation_history)
+        if execution_result:
+            content = f"Previous execution result: {execution_result}\n\nUser: {user_input}"
+        else:
+            content = user_input
+        messages.append({"role": "user", "content": content})
+        # Apply chat template
+        if hasattr(self.backend, 'tokenizer') and hasattr(self.backend.tokenizer, 'apply_chat_template'):
+            return self.backend.tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+        else:
+            return "\n".join([f"{m['role']}: {m['content']}" for m in messages]) + "\nassistant:"
+    def chat(self, user_input, execution_result=None):
+        """Generate response"""
+        prompt = self.build_prompt(user_input, execution_result)
+        return self.backend.generate(prompt, max_tokens=400)
+    def run(self):
+        """Run interactive loop"""
+        print("="*60)
+        print(f"Running on: {self.backend_name.upper()}")
+        print("="*60)
+        print("\nCommands:")
+        print("  - Type your question and press Enter")
+        print("  - 'clear' - Clear conversation history")
+        print("  - 'quit' - Exit")
+        print("="*60 + "\n")
+        last_execution_result = None
+        while True:
+            try:
+                user_input = input("\nYou: ").strip()
+                if not user_input:
+                    continue
+                if user_input.lower() in ['quit', 'exit', 'q']:
+                    print("\nGoodbye!")
+                    break
+                if user_input.lower() == 'clear':
+                    self.conversation_history = []
+                    last_execution_result = None
+                    print("Conversation cleared")
+                    continue
+                print("\n[Generating...]", end=" ", flush=True)
+                response = self.chat(user_input, last_execution_result)
+                print("Done!\n")
+                parts = self.parse_response(response)
+                if parts['thought']:
+                    print(f"Thought:\n{parts['thought']}\n")
+                if parts['execute']:
+                    print(f"Code:\n```python\n{parts['execute']}\n```\n")
+                    print("Executing...\n")
+                    result = execute_code(parts['execute'])
+                    if result["success"]:
+                        if result["output"]:
+                            print(f"Output:\n{result['output']}")
+                            last_execution_result = f"Output: {result['output']}"
+                            print("\n" + "-"*40)
+                            feedback = input("Is this correct? (y/n/skip): ").strip().lower()
+                            if feedback == 'n':
+                                print("\nMarked as incorrect")
+                                last_execution_result += " [INCORRECT]"
+                            elif feedback == 'y':
+                                print("\nCorrect!")
+                                last_execution_result = None
+                            else:
+                                last_execution_result = None
+                            self.conversation_history.append({"role": "user", "content": user_input})
+                            self.conversation_history.append({"role": "assistant", "content": response})
+                        else:
+                            print("Code executed (no output)")
+                            last_execution_result = None
+                        if result["error"]:
+                            print(f"Warnings: {result['error']}")
+                    else:
+                        print(f"Error: {result['error']}")
+                        last_execution_result = f"Error: {result['error']}"
+                if parts['solution']:
+                    print(f"\nSolution:\n{parts['solution']}")
+                if parts['feedback']:
+                    print(f"\nFeedback:\n{parts['feedback']}")
+                if not any(parts.values()):
+                    print(f"Response:\n{response[:500]}")
+                # Limit history
+                if len(self.conversation_history) > 10:
+                    self.conversation_history = self.conversation_history[-10:]
+                print("\n" + "="*60)
+            except KeyboardInterrupt:
+                print("\n\nInterrupted. Goodbye!")
+                break
+            except Exception as e:
+                print(f"\nError: {e}")
+                import traceback
+                traceback.print_exc()
+def main():
+    parser = argparse.ArgumentParser(description="CodeAct Interactive Demo")
+    parser.add_argument("--backend", choices=["auto", "cuda", "mps", "mlx", "cpu"],
+                       default="auto", help="Backend to use (default: auto)")
+    parser.add_argument("--model", type=str, default="Qwen/Qwen2.5-3B",
+                       help="Model name or path")
+    parser.add_argument("--adapter", type=str, default=None,
+                       help="Path to LoRA adapter")
+    args = parser.parse_args()
+    demo = CodeActDemo(
+        backend=args.backend,
+        model_name=args.model,
+        adapter_path=args.adapter
+    )
+    demo.run()
+if __name__ == "__main__":
+    main()