Commit
·
ec4aa90
1
Parent(s):
110a838
Initial deployment: Autonomous AI agent for code modernization
Browse files- .env.example +29 -0
- .gitignore +69 -0
- README.md +286 -5
- app.py +21 -0
- modal/api_test.py +59 -0
- pytest.ini +31 -0
- requirements.txt +39 -0
- src/__init__.py +3 -0
- src/agents/__init__.py +11 -0
- src/agents/analyzer.py +322 -0
- src/agents/classifier.py +119 -0
- src/agents/code_validator.py +346 -0
- src/agents/pattern_integration.py +296 -0
- src/agents/pattern_matcher.py +838 -0
- src/agents/test_generator.py +706 -0
- src/agents/transformer.py +358 -0
- src/config/__init__.py +10 -0
- src/config/ai_manager.py +323 -0
- src/config/gemini_config.py +99 -0
- src/config/gemini_schemas.py +261 -0
- src/mcp/__init__.py +9 -0
- src/mcp/github_client.py +407 -0
- src/mcp/manager.py +169 -0
- src/mcp/memory_client.py +202 -0
- src/mcp/search_client.py +247 -0
- src/sandbox/__init__.py +5 -0
- src/sandbox/config.py +124 -0
- src/sandbox/images.py +122 -0
- src/sandbox/modal_executor.py +423 -0
- src/sandbox/runners/__init__.py +32 -0
- src/sandbox/runners/java_runner.py +350 -0
- src/sandbox/runners/javascript_runner.py +318 -0
- src/sandbox/runners/python_runner.py +219 -0
- src/sandbox/validator.py +718 -0
- src/search/__init__.py +8 -0
- src/search/embeddings.py +350 -0
- src/search/vector_store.py +350 -0
- src/ui/__init__.py +1 -0
- src/ui/app.py +1045 -0
- src/utils/__init__.py +1 -0
- src/utils/file_handler.py +166 -0
- src/workflow/__init__.py +5 -0
- src/workflow/orchestrator.py +732 -0
.env.example
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AI Provider Configuration
|
| 2 |
+
# ============================================
|
| 3 |
+
# Choose your AI provider: gemini or nebius
|
| 4 |
+
AI_PROVIDER=gemini
|
| 5 |
+
|
| 6 |
+
# Gemini API Configuration
|
| 7 |
+
GEMINI_API_KEY=your_gemini_api_key
|
| 8 |
+
# Optional: Change the Gemini model (default: gemini-2.5-flash)
|
| 9 |
+
# Other options: gemini-3-pro, gemini-2.5-pro, etc.
|
| 10 |
+
GEMINI_MODEL=gemini-2.5-flash
|
| 11 |
+
|
| 12 |
+
# Nebius Token Factory Configuration
|
| 13 |
+
NEBIUS_API_KEY=your_nebius_api_key
|
| 14 |
+
# Optional: Change the Nebius model (default: zai-org/GLM-4.5)
|
| 15 |
+
NEBIUS_MODEL=zai-org/GLM-4.5
|
| 16 |
+
|
| 17 |
+
# Modal Configuration
|
| 18 |
+
MODAL_TOKEN_ID=your_modal_token_id
|
| 19 |
+
MODAL_TOKEN_SECRET=your_modal_token_secret
|
| 20 |
+
MODAL_API_URL=your_modal_api_url
|
| 21 |
+
|
| 22 |
+
# GitHub Configuration for code fetching
|
| 23 |
+
GITHUB_TOKEN=your_github_personal_access_token
|
| 24 |
+
|
| 25 |
+
# Optional: Database Configuration
|
| 26 |
+
DATABASE_URL=sqlite:///./modernizer.db
|
| 27 |
+
|
| 28 |
+
# Tavily API Configuration (Optional)
|
| 29 |
+
TAVILY_API_KEY=your_tavily_api_key
|
.gitignore
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
|
| 23 |
+
# Virtual Environment
|
| 24 |
+
venv/
|
| 25 |
+
env/
|
| 26 |
+
ENV/
|
| 27 |
+
.venv
|
| 28 |
+
|
| 29 |
+
# IDE
|
| 30 |
+
.vscode/
|
| 31 |
+
.idea/
|
| 32 |
+
*.swp
|
| 33 |
+
*.swo
|
| 34 |
+
*~
|
| 35 |
+
|
| 36 |
+
# Testing
|
| 37 |
+
.pytest_cache/
|
| 38 |
+
.pattern_cache/
|
| 39 |
+
.coverage
|
| 40 |
+
htmlcov/
|
| 41 |
+
.tox/
|
| 42 |
+
.mypy_cache/
|
| 43 |
+
.dmypy.json
|
| 44 |
+
dmypy.json
|
| 45 |
+
|
| 46 |
+
# Environment variables
|
| 47 |
+
.env
|
| 48 |
+
.env.local
|
| 49 |
+
|
| 50 |
+
# Uploads and temporary files
|
| 51 |
+
uploads/
|
| 52 |
+
*.zip
|
| 53 |
+
*.tar.gz
|
| 54 |
+
|
| 55 |
+
# Output directories
|
| 56 |
+
modernized_output/
|
| 57 |
+
output/
|
| 58 |
+
temp/
|
| 59 |
+
tmp/
|
| 60 |
+
|
| 61 |
+
# OS
|
| 62 |
+
.DS_Store
|
| 63 |
+
Thumbs.db
|
| 64 |
+
|
| 65 |
+
# Logs
|
| 66 |
+
*.log
|
| 67 |
+
|
| 68 |
+
# Modal
|
| 69 |
+
.modal/
|
README.md
CHANGED
|
@@ -1,14 +1,295 @@
|
|
| 1 |
---
|
| 2 |
-
title: Legacy Code Modernizer
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: purple
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 6.0.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
-
short_description: AI
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Legacy Code Modernizer - Autonomous AI Agent
|
| 3 |
+
emoji: 🤖
|
| 4 |
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: 6.0.1
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: apache-2.0
|
| 11 |
+
short_description: Autonomous AI agent for code modernization with MCP tools
|
| 12 |
+
tags:
|
| 13 |
+
- mcp-in-action-track-enterprise
|
| 14 |
+
- code-modernization
|
| 15 |
+
- autonomous-agent
|
| 16 |
+
- mcp
|
| 17 |
+
- gradio
|
| 18 |
---
|
| 19 |
|
| 20 |
+
# 🤖 Legacy Code Modernizer - Autonomous AI Agent
|
| 21 |
+
|
| 22 |
+
**Track 2: MCP in Action - Enterprise Applications**
|
| 23 |
+
|
| 24 |
+
An autonomous AI agent that modernizes legacy codebases through intelligent planning, reasoning, and execution using Model Context Protocol (MCP) tools.
|
| 25 |
+
|
| 26 |
+
## 🎯 Project Overview
|
| 27 |
+
|
| 28 |
+
Legacy Code Modernizer is a complete autonomous agent system that transforms outdated code into modern, secure, and maintainable software. The agent autonomously:
|
| 29 |
+
|
| 30 |
+
1. **Plans** - Analyzes codebases and creates modernization strategies
|
| 31 |
+
2. **Reasons** - Makes intelligent decisions about transformation priorities
|
| 32 |
+
3. **Executes** - Applies transformations, generates tests, and validates changes
|
| 33 |
+
4. **Integrates** - Creates GitHub PRs with comprehensive documentation
|
| 34 |
+
|
| 35 |
+
## 🏆 Why This Project Stands Out
|
| 36 |
+
|
| 37 |
+
### Autonomous Agent Capabilities
|
| 38 |
+
|
| 39 |
+
**Multi-Phase Planning & Reasoning:**
|
| 40 |
+
- **Phase 1**: Intelligent file discovery and classification using AI pattern detection
|
| 41 |
+
- **Phase 2**: Semantic code analysis with vector-based similarity search (LlamaIndex + Chroma)
|
| 42 |
+
- **Phase 3**: Deep pattern analysis using multiple AI models (Gemini, Nebius AI)
|
| 43 |
+
- **Phase 4**: Autonomous code transformation with context-aware reasoning
|
| 44 |
+
- **Phase 5**: Automated testing in isolated sandbox + GitHub PR creation
|
| 45 |
+
|
| 46 |
+
**Context Engineering & RAG:**
|
| 47 |
+
- Vector embeddings for semantic code search
|
| 48 |
+
- Pattern grouping across similar files
|
| 49 |
+
- Historical transformation caching via MCP Memory
|
| 50 |
+
- Real-time migration guide retrieval via MCP Search
|
| 51 |
+
|
| 52 |
+
### MCP Tools Integration
|
| 53 |
+
|
| 54 |
+
The agent uses **4 MCP servers** as autonomous tools:
|
| 55 |
+
|
| 56 |
+
1. **GitHub MCP** - Autonomous PR creation with comprehensive documentation
|
| 57 |
+
2. **Tavily Search MCP** - Real-time migration guide discovery
|
| 58 |
+
3. **Memory MCP** - Pattern analysis caching and learning
|
| 59 |
+
4. **Filesystem MCP** - Safe file operations (planned)
|
| 60 |
+
|
| 61 |
+
### Real-World Enterprise Value
|
| 62 |
+
|
| 63 |
+
- **Multi-language support**: Python, Java, JavaScript, TypeScript
|
| 64 |
+
- **Secure execution**: Modal sandbox with isolated test environments
|
| 65 |
+
- **Production-ready**: Comprehensive test generation with coverage reporting
|
| 66 |
+
|
| 67 |
+
## 🚀 Demo
|
| 68 |
+
|
| 69 |
+
### Video Demo
|
| 70 |
+
**[Demo video](https://drive.google.com/file/d/1ph0NK8QKXRStjydqBV9w6HJaViirswE2/view?usp=sharing)**
|
| 71 |
+
|
| 72 |
+
### Social Media Post
|
| 73 |
+
**Xpost link will be added here**
|
| 74 |
+
|
| 75 |
+
## 🎬 Quick Start
|
| 76 |
+
|
| 77 |
+
### Try It Live on Hugging Face Spaces
|
| 78 |
+
|
| 79 |
+
1. **Upload a code file** (Python, Java, JavaScript, TypeScript)
|
| 80 |
+
2. **Select target version** (auto-detected from your code)
|
| 81 |
+
3. **Click "Start Modernization"**
|
| 82 |
+
4. **Watch the autonomous agent work** through all 5 phases
|
| 83 |
+
5. **Download modernized code, tests, and reports**
|
| 84 |
+
|
| 85 |
+
### Local Installation
|
| 86 |
+
|
| 87 |
+
```bash
|
| 88 |
+
# Clone repository
|
| 89 |
+
git clone https://huggingface.co/spaces/MCP-1st-Birthday/legacy_code_modernizer
|
| 90 |
+
cd legacy_code_modernizer
|
| 91 |
+
|
| 92 |
+
# Set up environment variables
|
| 93 |
+
cp .env.example .env
|
| 94 |
+
# Edit .env with your API keys:
|
| 95 |
+
# - GEMINI_API_KEY (required)
|
| 96 |
+
# - GITHUB_TOKEN (for PR creation)
|
| 97 |
+
# - TAVILY_API_KEY (for search)
|
| 98 |
+
# - MODAL_TOKEN_ID & MODAL_TOKEN_SECRET (for sandbox)
|
| 99 |
+
|
| 100 |
+
# Set up Python virtual environment
|
| 101 |
+
# On macOS / Linux:
|
| 102 |
+
source venv/bin/activate
|
| 103 |
+
# On Windows PowerShell:
|
| 104 |
+
.\venv\Scripts\Activate.ps1
|
| 105 |
+
# On Windows CMD:
|
| 106 |
+
venv\Scripts\activate.bat
|
| 107 |
+
|
| 108 |
+
# Install dependencies
|
| 109 |
+
pip install -r requirements.txt
|
| 110 |
+
|
| 111 |
+
# Run the Gradio app
|
| 112 |
+
python app.py
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
## 🧠 Autonomous Agent Architecture
|
| 116 |
+
|
| 117 |
+
### Planning Phase
|
| 118 |
+
```
|
| 119 |
+
Input: Legacy codebase
|
| 120 |
+
↓
|
| 121 |
+
Agent analyzes file structure and content
|
| 122 |
+
↓
|
| 123 |
+
Classifies files by modernization priority
|
| 124 |
+
↓
|
| 125 |
+
Creates transformation roadmap
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
### Reasoning Phase
|
| 129 |
+
```
|
| 130 |
+
Agent groups similar patterns using vector search
|
| 131 |
+
↓
|
| 132 |
+
Retrieves migration guides via Tavily MCP
|
| 133 |
+
↓
|
| 134 |
+
Checks cached analyses via Memory MCP
|
| 135 |
+
↓
|
| 136 |
+
Prioritizes transformations by risk/impact
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
### Execution Phase
|
| 140 |
+
```
|
| 141 |
+
Agent transforms code with AI models
|
| 142 |
+
↓
|
| 143 |
+
Generates comprehensive test suites
|
| 144 |
+
↓
|
| 145 |
+
Validates in isolated Modal sandbox
|
| 146 |
+
↓
|
| 147 |
+
Auto-fixes export/import issues
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
### Integration Phase
|
| 151 |
+
```
|
| 152 |
+
Agent creates GitHub branch via GitHub MCP
|
| 153 |
+
↓
|
| 154 |
+
Commits transformed files
|
| 155 |
+
↓
|
| 156 |
+
Generates PR with deployment checklist
|
| 157 |
+
↓
|
| 158 |
+
Adds rollback plan and test results
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
## 🛠️ Technical Stack
|
| 162 |
+
|
| 163 |
+
### AI & LLM
|
| 164 |
+
- **Google Gemini** - Primary reasoning engine with large context window
|
| 165 |
+
- **Nebius AI** - Alternative model for diverse perspectives
|
| 166 |
+
- **LlamaIndex** - RAG framework for semantic code search
|
| 167 |
+
- **Chroma** - Vector database for embeddings
|
| 168 |
+
|
| 169 |
+
### MCP Integration
|
| 170 |
+
- **mcp** (v1.22.0) - Model Context Protocol SDK
|
| 171 |
+
- **@modelcontextprotocol/server-github** - GitHub operations
|
| 172 |
+
- **@modelcontextprotocol/server-tavily** - Web search
|
| 173 |
+
- **@modelcontextprotocol/server-memory** - Persistent storage
|
| 174 |
+
|
| 175 |
+
### Execution & Testing
|
| 176 |
+
- **Modal** - Serverless sandbox for secure test execution
|
| 177 |
+
- **pytest/Jest/JUnit** - Language-specific test frameworks
|
| 178 |
+
- **Coverage.py/JaCoCo** - Code coverage analysis
|
| 179 |
+
|
| 180 |
+
### UI & Orchestration
|
| 181 |
+
- **Gradio 6.0** - Interactive web interface
|
| 182 |
+
- **LangGraph** - Agent workflow orchestration
|
| 183 |
+
- **asyncio** - Asynchronous execution
|
| 184 |
+
|
| 185 |
+
## 📊 Features Showcase
|
| 186 |
+
|
| 187 |
+
### 1. Intelligent Pattern Detection
|
| 188 |
+
```python
|
| 189 |
+
# Agent automatically detects legacy patterns:
|
| 190 |
+
- Deprecated libraries (MySQLdb → PyMySQL)
|
| 191 |
+
- Security vulnerabilities (SQL injection)
|
| 192 |
+
- Python 2 syntax → Python 3
|
| 193 |
+
- Missing type hints
|
| 194 |
+
- Old-style string formatting
|
| 195 |
+
```
|
| 196 |
+
|
| 197 |
+
### 2. Semantic Code Search
|
| 198 |
+
```python
|
| 199 |
+
# Vector-based similarity search finds:
|
| 200 |
+
- Files with similar legacy patterns
|
| 201 |
+
- Related security vulnerabilities
|
| 202 |
+
- Common refactoring opportunities
|
| 203 |
+
```
|
| 204 |
+
|
| 205 |
+
### 3. Autonomous Test Generation
|
| 206 |
+
```python
|
| 207 |
+
# Agent generates:
|
| 208 |
+
- Unit tests with pytest/Jest/JUnit
|
| 209 |
+
- Integration tests
|
| 210 |
+
- Edge case coverage
|
| 211 |
+
- Performance benchmarks
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
### 4. GitHub Integration via MCP
|
| 215 |
+
```python
|
| 216 |
+
# Automated PR includes:
|
| 217 |
+
- Comprehensive change summary
|
| 218 |
+
- Test results with coverage
|
| 219 |
+
- Risk assessment
|
| 220 |
+
- Deployment checklist
|
| 221 |
+
- Rollback plan
|
| 222 |
+
```
|
| 223 |
+
|
| 224 |
+
## 🎯 Supported Languages & Versions
|
| 225 |
+
|
| 226 |
+
### Python
|
| 227 |
+
- **Versions**: 3.10, 3.11, 3.12, 3.13, 3.14
|
| 228 |
+
- **Frameworks**: Django 5.2 LTS, Flask 3.1, FastAPI 0.122
|
| 229 |
+
- **Testing**: pytest with coverage
|
| 230 |
+
|
| 231 |
+
### Java
|
| 232 |
+
- **Versions**: Java 17 LTS, 21 LTS, 23, 25 LTS
|
| 233 |
+
- **Frameworks**: Spring Boot 3.4, 4.0
|
| 234 |
+
- **Testing**: Maven + JUnit 5 + JaCoCo
|
| 235 |
+
|
| 236 |
+
### JavaScript
|
| 237 |
+
- **Standards**: ES2024, ES2025
|
| 238 |
+
- **Runtimes**: Node.js 22 LTS, 24 LTS, 25
|
| 239 |
+
- **Frameworks**: React 19, Angular 21, Vue 3.5, Express 5.1, Next.js 16
|
| 240 |
+
- **Testing**: Jest with coverage
|
| 241 |
+
|
| 242 |
+
### TypeScript
|
| 243 |
+
- **Versions**: 5.6, 5.7, 5.8, 5.9
|
| 244 |
+
- **Frameworks**: React 19, Angular 21, Next.js 16
|
| 245 |
+
- **Testing**: Jest with ts-jest
|
| 246 |
+
|
| 247 |
+
## 🔒 Security & Isolation
|
| 248 |
+
|
| 249 |
+
### Modal Sandbox Execution
|
| 250 |
+
- **Network isolation**: No external network access during tests
|
| 251 |
+
- **Filesystem isolation**: Temporary containers per execution
|
| 252 |
+
- **Resource limits**: CPU and memory constraints
|
| 253 |
+
- **Automatic cleanup**: Containers destroyed after execution
|
| 254 |
+
|
| 255 |
+
### Code Validation
|
| 256 |
+
- **Syntax checking**: Pre-execution validation
|
| 257 |
+
- **Import/export fixing**: Automatic resolution of module issues
|
| 258 |
+
- **Security scanning**: Detection of vulnerabilities
|
| 259 |
+
- **Type checking**: Language-specific validation
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
## 🎓 Advanced Features
|
| 263 |
+
|
| 264 |
+
### Context Engineering
|
| 265 |
+
- **Sliding window context**: Manages large files efficiently
|
| 266 |
+
- **Cross-file analysis**: Understands dependencies
|
| 267 |
+
- **Pattern learning**: Improves with usage via Memory MCP
|
| 268 |
+
|
| 269 |
+
### RAG Implementation
|
| 270 |
+
- **Semantic chunking**: Intelligent code splitting
|
| 271 |
+
- **Vector similarity**: Finds related patterns
|
| 272 |
+
- **Hybrid search**: Combines keyword + semantic search
|
| 273 |
+
|
| 274 |
+
### Agent Reasoning
|
| 275 |
+
- **Priority scoring**: Risk vs. impact analysis
|
| 276 |
+
- **Dependency tracking**: Understands file relationships
|
| 277 |
+
|
| 278 |
+
## 📝 License
|
| 279 |
+
|
| 280 |
+
Apache 2.0 - See LICENSE file for details
|
| 281 |
+
|
| 282 |
+
## 🙏 Acknowledgments
|
| 283 |
+
|
| 284 |
+
Built for **MCP's 1st Birthday Hackathon** hosted by Anthropic and Gradio.
|
| 285 |
+
|
| 286 |
+
**Powered by:**
|
| 287 |
+
- Google Gemini & Nebius AI
|
| 288 |
+
- Model Context Protocol (MCP)
|
| 289 |
+
- LlamaIndex & Chroma
|
| 290 |
+
- Modal
|
| 291 |
+
- Gradio
|
| 292 |
+
|
| 293 |
+
---
|
| 294 |
+
|
| 295 |
+
*Autonomous agents + MCP tools = The future of software development*
|
app.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Entry point for HuggingFace Spaces
|
| 3 |
+
Redirects to the actual app in src/ui/app.py
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import sys
|
| 7 |
+
import os
|
| 8 |
+
|
| 9 |
+
# Add src directory to Python path
|
| 10 |
+
sys.path.insert(0, os.path.dirname(__file__))
|
| 11 |
+
|
| 12 |
+
# Import and run the actual app
|
| 13 |
+
from src.ui.app import app
|
| 14 |
+
|
| 15 |
+
if __name__ == "__main__":
|
| 16 |
+
app.launch(
|
| 17 |
+
server_name="0.0.0.0",
|
| 18 |
+
server_port=7860,
|
| 19 |
+
share=False,
|
| 20 |
+
show_error=True
|
| 21 |
+
)
|
modal/api_test.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
# ---------------------------------------------------------
|
| 9 |
+
# Modal API URL is loaded from .env file
|
| 10 |
+
# Set MODAL_API_URL in your .env file
|
| 11 |
+
# It usually looks like: https://your-username--text-embeddings-inference-api-text-embed-7389a1.modal.run
|
| 12 |
+
# ---------------------------------------------------------
|
| 13 |
+
API_URL = os.getenv("MODAL_API_URL", "").strip()
|
| 14 |
+
|
| 15 |
+
if not API_URL:
|
| 16 |
+
raise ValueError("MODAL_API_URL not found in .env file. Please set it to your Modal endpoint URL.")
|
| 17 |
+
|
| 18 |
+
def test_embeddings():
|
| 19 |
+
print(f"Testing API at: {API_URL}")
|
| 20 |
+
|
| 21 |
+
# 1. Define the input text
|
| 22 |
+
payload = {
|
| 23 |
+
"inputs": [
|
| 24 |
+
"Hello, this is a test sentence.",
|
| 25 |
+
"Running text embeddings on Modal is fast."
|
| 26 |
+
]
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
try:
|
| 30 |
+
# 2. Send POST request
|
| 31 |
+
response = requests.post(API_URL, json=payload)
|
| 32 |
+
|
| 33 |
+
# 3. Check for errors
|
| 34 |
+
response.raise_for_status()
|
| 35 |
+
|
| 36 |
+
# 4. Parse the result
|
| 37 |
+
data = response.json()
|
| 38 |
+
|
| 39 |
+
# 5. Display results
|
| 40 |
+
model_name = data.get("model", "Unknown")
|
| 41 |
+
embeddings = data.get("embeddings", [])
|
| 42 |
+
dims = data.get("dimensions", 0)
|
| 43 |
+
|
| 44 |
+
print("\n--- Success! ---")
|
| 45 |
+
print(f"Model used: {model_name}")
|
| 46 |
+
print(f"Vector dimensions: {dims}")
|
| 47 |
+
print(f"Number of texts embedded: {len(embeddings)}")
|
| 48 |
+
|
| 49 |
+
# Print the first few numbers of the first embedding to verify
|
| 50 |
+
if embeddings:
|
| 51 |
+
print(f"\nFirst 5 values of first embedding:\n{embeddings[0][:5]}...")
|
| 52 |
+
|
| 53 |
+
except requests.exceptions.RequestException as e:
|
| 54 |
+
print(f"\nError calling API: {e}")
|
| 55 |
+
if response is not None:
|
| 56 |
+
print(f"Server response: {response.text}")
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
test_embeddings()
|
pytest.ini
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool:pytest]
|
| 2 |
+
testpaths = tests
|
| 3 |
+
python_files = test_*.py
|
| 4 |
+
python_classes = Test*
|
| 5 |
+
python_functions = test_*
|
| 6 |
+
addopts =
|
| 7 |
+
-v
|
| 8 |
+
--tb=short
|
| 9 |
+
--strict-markers
|
| 10 |
+
--cov=src
|
| 11 |
+
--cov-report=html
|
| 12 |
+
--cov-report=term-missing
|
| 13 |
+
markers =
|
| 14 |
+
integration: Integration tests (deselect with '-m "not integration"')
|
| 15 |
+
slow: Slow tests (deselect with '-m "not slow"')
|
| 16 |
+
|
| 17 |
+
[coverage:run]
|
| 18 |
+
source = src
|
| 19 |
+
omit =
|
| 20 |
+
*/tests/*
|
| 21 |
+
*/__pycache__/*
|
| 22 |
+
*/venv/*
|
| 23 |
+
*/env/*
|
| 24 |
+
|
| 25 |
+
[coverage:report]
|
| 26 |
+
precision = 2
|
| 27 |
+
show_missing = True
|
| 28 |
+
skip_covered = False
|
| 29 |
+
|
| 30 |
+
[coverage:html]
|
| 31 |
+
directory = htmlcov
|
requirements.txt
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# AI & LLM
|
| 2 |
+
google-genai>=1.0.0
|
| 3 |
+
openai>=1.0.0
|
| 4 |
+
llama-index>=0.14.0
|
| 5 |
+
llama-index-llms-google-genai>=0.4.0
|
| 6 |
+
llama-index-llms-openai>=0.4.0
|
| 7 |
+
llama-index-embeddings-huggingface>=0.5.0
|
| 8 |
+
|
| 9 |
+
# Vector Store & Embeddings
|
| 10 |
+
chromadb>=1.3.0
|
| 11 |
+
llama-index-vector-stores-chroma>=0.4.0
|
| 12 |
+
|
| 13 |
+
# Agent Orchestration
|
| 14 |
+
langgraph>=1.0.0
|
| 15 |
+
langchain-core>=0.3.0
|
| 16 |
+
|
| 17 |
+
# Compute & Sandbox
|
| 18 |
+
modal>=1.2.0
|
| 19 |
+
|
| 20 |
+
# MCP Protocol
|
| 21 |
+
mcp>=1.22.0
|
| 22 |
+
|
| 23 |
+
# UI Framework
|
| 24 |
+
gradio>=6.0.0
|
| 25 |
+
|
| 26 |
+
# Database & ORM
|
| 27 |
+
sqlalchemy>=2.0.0
|
| 28 |
+
pymysql>=1.1.0
|
| 29 |
+
|
| 30 |
+
# Testing
|
| 31 |
+
pytest>=9.0.0
|
| 32 |
+
pytest-cov>=6.0.0
|
| 33 |
+
pytest-timeout>=2.3.0
|
| 34 |
+
pytest-asyncio>=0.24.0
|
| 35 |
+
|
| 36 |
+
# Utilities
|
| 37 |
+
python-dotenv>=1.0.0
|
| 38 |
+
pydantic>=2.10.0
|
| 39 |
+
transformers>=4.30.0 # For proper tokenization
|
src/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Legacy Code Modernizer Agent - AI-powered code modernization system."""
|
| 2 |
+
|
| 3 |
+
__version__ = "0.1.0"
|
src/agents/__init__.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Agent components for code analysis and transformation."""
|
| 2 |
+
|
| 3 |
+
from .classifier import CodeClassifier
|
| 4 |
+
from .analyzer import CodeAnalyzer
|
| 5 |
+
from .transformer import CodeTransformer
|
| 6 |
+
from .test_generator import CodeTestGenerator
|
| 7 |
+
|
| 8 |
+
# Keep backward compatibility
|
| 9 |
+
TestGenerator = CodeTestGenerator
|
| 10 |
+
|
| 11 |
+
__all__ = ['CodeClassifier', 'CodeAnalyzer', 'CodeTransformer', 'CodeTestGenerator', 'TestGenerator']
|
src/agents/analyzer.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Deep code analyzer using AI with RAG and MCP integration.
|
| 3 |
+
Supports multiple AI providers (Gemini, Nebius, OpenAI).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
import logging
|
| 9 |
+
from typing import Dict, List, Optional
|
| 10 |
+
|
| 11 |
+
from src.config import AIManager, GeminiSchemas
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class CodeAnalyzer:
|
| 17 |
+
"""
|
| 18 |
+
Deep analyzer for legacy code patterns using AI + RAG.
|
| 19 |
+
Integrates with MCP servers for enhanced analysis.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, mcp_manager=None, search_engine=None):
|
| 23 |
+
"""
|
| 24 |
+
Initialize Code Analyzer.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
mcp_manager: Optional MCPManager instance
|
| 28 |
+
search_engine: Optional CodeSearchEngine instance
|
| 29 |
+
"""
|
| 30 |
+
self.mcp_manager = mcp_manager
|
| 31 |
+
self.search_engine = search_engine
|
| 32 |
+
|
| 33 |
+
# Use centralized AI manager
|
| 34 |
+
self.ai_manager = AIManager()
|
| 35 |
+
|
| 36 |
+
logger.info(
|
| 37 |
+
f"CodeAnalyzer initialized with provider: {self.ai_manager.provider_name}, "
|
| 38 |
+
f"model: {self.ai_manager.model_name}"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
async def analyze_pattern(self, files: List[str], pattern_name: str,
|
| 43 |
+
file_contents: Dict[str, str]) -> Dict:
|
| 44 |
+
"""
|
| 45 |
+
Deep analysis of legacy pattern with full context.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
files: List of file paths to analyze
|
| 49 |
+
pattern_name: Name of the pattern (e.g., "MySQLdb usage")
|
| 50 |
+
file_contents: Dictionary mapping file paths to their contents
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
Analysis result dictionary
|
| 54 |
+
"""
|
| 55 |
+
logger.info(f"Analyzing pattern: {pattern_name} in {len(files)} files")
|
| 56 |
+
|
| 57 |
+
# Check cache first (if MCP manager available)
|
| 58 |
+
if self.mcp_manager:
|
| 59 |
+
try:
|
| 60 |
+
from src.mcp.memory_client import MemoryMCPClient
|
| 61 |
+
memory_client = MemoryMCPClient(self.mcp_manager)
|
| 62 |
+
|
| 63 |
+
pattern_id = self._generate_pattern_id(pattern_name, files)
|
| 64 |
+
cached_analysis = await memory_client.retrieve_pattern_analysis(pattern_id)
|
| 65 |
+
|
| 66 |
+
if cached_analysis:
|
| 67 |
+
logger.info(f"Using cached analysis for {pattern_name}")
|
| 68 |
+
return cached_analysis
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.warning(f"Could not retrieve cached analysis: {e}")
|
| 71 |
+
|
| 72 |
+
# Get context from search engine if available
|
| 73 |
+
context = ""
|
| 74 |
+
if self.search_engine:
|
| 75 |
+
try:
|
| 76 |
+
similar_files = self.search_engine.find_similar_patterns(
|
| 77 |
+
f"Files with {pattern_name}",
|
| 78 |
+
top_k=10
|
| 79 |
+
)
|
| 80 |
+
context = f"\n\nSimilar patterns found in: {', '.join([f['file_path'] for f in similar_files[:5]])}"
|
| 81 |
+
except Exception as e:
|
| 82 |
+
logger.warning(f"Could not get search context: {e}")
|
| 83 |
+
|
| 84 |
+
# Get migration guides from Tavily if available
|
| 85 |
+
migration_guides = ""
|
| 86 |
+
if self.mcp_manager:
|
| 87 |
+
try:
|
| 88 |
+
from src.mcp.search_client import SearchMCPClient
|
| 89 |
+
search_client = SearchMCPClient(self.mcp_manager)
|
| 90 |
+
|
| 91 |
+
# Extract technologies from pattern name
|
| 92 |
+
guides = await search_client.find_migration_guide(
|
| 93 |
+
from_tech=pattern_name.split()[0],
|
| 94 |
+
to_tech="modern alternative",
|
| 95 |
+
max_results=3
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
if guides:
|
| 99 |
+
migration_guides = "\n\nRelevant migration guides:\n"
|
| 100 |
+
for guide in guides:
|
| 101 |
+
migration_guides += f"- {guide['title']}: {guide['url']}\n"
|
| 102 |
+
except Exception as e:
|
| 103 |
+
logger.warning(f"Could not fetch migration guides: {e}")
|
| 104 |
+
|
| 105 |
+
# Combine file contents
|
| 106 |
+
code_samples = "\n\n".join([
|
| 107 |
+
f"=== {file_path} ===\n{content[:1000]}..." # Limit to first 1000 chars per file
|
| 108 |
+
for file_path, content in list(file_contents.items())[:5] # Limit to 5 files
|
| 109 |
+
])
|
| 110 |
+
|
| 111 |
+
# Build analysis prompt
|
| 112 |
+
prompt = f"""You are a senior software architect analyzing legacy code for modernization.
|
| 113 |
+
|
| 114 |
+
PATTERN TO ANALYZE: {pattern_name}
|
| 115 |
+
|
| 116 |
+
FILES AFFECTED: {', '.join(files)}
|
| 117 |
+
|
| 118 |
+
CODE SAMPLES:
|
| 119 |
+
{code_samples}
|
| 120 |
+
|
| 121 |
+
{context}
|
| 122 |
+
{migration_guides}
|
| 123 |
+
|
| 124 |
+
TASK: Provide a comprehensive analysis with:
|
| 125 |
+
1. **Current Implementation**: What the code currently does
|
| 126 |
+
2. **Issues**: Specific problems (security, performance, maintainability)
|
| 127 |
+
3. **Modern Recommendation**: Recommended library/pattern with version
|
| 128 |
+
4. **Migration Steps**: Detailed step-by-step migration plan
|
| 129 |
+
5. **Risk Assessment**: Potential risks and mitigation strategies
|
| 130 |
+
6. **Estimated Effort**: Time estimate for migration
|
| 131 |
+
|
| 132 |
+
Respond in JSON format with these exact keys:
|
| 133 |
+
{{
|
| 134 |
+
"pattern": "{pattern_name}",
|
| 135 |
+
"files": {json.dumps(files)},
|
| 136 |
+
"analysis": "detailed analysis",
|
| 137 |
+
"issues": ["issue1", "issue2", ...],
|
| 138 |
+
"recommendation": "recommended approach",
|
| 139 |
+
"steps": ["step1", "step2", ...],
|
| 140 |
+
"risks": "risk assessment",
|
| 141 |
+
"effort_hours": estimated_hours
|
| 142 |
+
}}
|
| 143 |
+
"""
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
# Use JSON schema for guaranteed structure
|
| 147 |
+
schema = GeminiSchemas.code_analysis()
|
| 148 |
+
|
| 149 |
+
# Call AI with configured model
|
| 150 |
+
response_text = self.ai_manager.generate_content(
|
| 151 |
+
prompt=prompt,
|
| 152 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 153 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_MEDIUM,
|
| 154 |
+
response_format="json",
|
| 155 |
+
response_schema=schema
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# Parse JSON response
|
| 159 |
+
analysis = json.loads(response_text)
|
| 160 |
+
|
| 161 |
+
# Cache the analysis
|
| 162 |
+
if self.mcp_manager:
|
| 163 |
+
try:
|
| 164 |
+
from src.mcp.memory_client import MemoryMCPClient
|
| 165 |
+
memory_client = MemoryMCPClient(self.mcp_manager)
|
| 166 |
+
pattern_id = self._generate_pattern_id(pattern_name, files)
|
| 167 |
+
await memory_client.store_pattern_analysis(pattern_id, analysis)
|
| 168 |
+
except Exception as e:
|
| 169 |
+
logger.warning(f"Could not cache analysis: {e}")
|
| 170 |
+
|
| 171 |
+
logger.info(f"Analysis complete for {pattern_name}")
|
| 172 |
+
return analysis
|
| 173 |
+
|
| 174 |
+
except Exception as e:
|
| 175 |
+
logger.error(f"Error during analysis: {e}")
|
| 176 |
+
# Return fallback analysis
|
| 177 |
+
return {
|
| 178 |
+
"pattern": pattern_name,
|
| 179 |
+
"files": files,
|
| 180 |
+
"analysis": f"Error during analysis: {str(e)}",
|
| 181 |
+
"issues": ["Analysis failed"],
|
| 182 |
+
"recommendation": "Manual review required",
|
| 183 |
+
"steps": ["Review error logs", "Retry analysis"],
|
| 184 |
+
"risks": "High - analysis incomplete",
|
| 185 |
+
"effort_hours": 0
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
def _generate_pattern_id(self, pattern_name: str, files: List[str]) -> str:
|
| 189 |
+
"""
|
| 190 |
+
Generate unique ID for a pattern.
|
| 191 |
+
|
| 192 |
+
Args:
|
| 193 |
+
pattern_name: Name of the pattern
|
| 194 |
+
files: List of files
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
Unique pattern ID
|
| 198 |
+
"""
|
| 199 |
+
import hashlib
|
| 200 |
+
|
| 201 |
+
# Create hash from pattern name and sorted file list
|
| 202 |
+
content = f"{pattern_name}:{'|'.join(sorted(files))}"
|
| 203 |
+
return hashlib.md5(content.encode()).hexdigest()
|
| 204 |
+
|
| 205 |
+
async def analyze_security_issues(self, file_path: str, code: str) -> Dict:
|
| 206 |
+
"""
|
| 207 |
+
Analyze code for security vulnerabilities.
|
| 208 |
+
|
| 209 |
+
Args:
|
| 210 |
+
file_path: Path to the file
|
| 211 |
+
code: Code content
|
| 212 |
+
|
| 213 |
+
Returns:
|
| 214 |
+
Security analysis result
|
| 215 |
+
"""
|
| 216 |
+
logger.info(f"Analyzing security issues in {file_path}")
|
| 217 |
+
|
| 218 |
+
prompt = f"""Analyze this code for security vulnerabilities:
|
| 219 |
+
|
| 220 |
+
FILE: {file_path}
|
| 221 |
+
|
| 222 |
+
CODE:
|
| 223 |
+
{code[:2000]}
|
| 224 |
+
|
| 225 |
+
Identify:
|
| 226 |
+
1. SQL injection risks
|
| 227 |
+
2. Hardcoded credentials
|
| 228 |
+
3. Insecure cryptography
|
| 229 |
+
4. Path traversal vulnerabilities
|
| 230 |
+
5. Command injection risks
|
| 231 |
+
6. Other security issues
|
| 232 |
+
|
| 233 |
+
Respond in JSON format:
|
| 234 |
+
{{
|
| 235 |
+
"vulnerabilities": [
|
| 236 |
+
{{
|
| 237 |
+
"type": "vulnerability type",
|
| 238 |
+
"severity": "critical|high|medium|low",
|
| 239 |
+
"line_number": estimated_line,
|
| 240 |
+
"description": "description",
|
| 241 |
+
"recommendation": "how to fix"
|
| 242 |
+
}}
|
| 243 |
+
],
|
| 244 |
+
"security_score": 0-100
|
| 245 |
+
}}
|
| 246 |
+
"""
|
| 247 |
+
|
| 248 |
+
try:
|
| 249 |
+
response_text = self.ai_manager.generate_content(
|
| 250 |
+
prompt=prompt,
|
| 251 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 252 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_SMALL,
|
| 253 |
+
response_format="json"
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
return json.loads(response_text)
|
| 257 |
+
|
| 258 |
+
except Exception as e:
|
| 259 |
+
logger.error(f"Error during security analysis: {e}")
|
| 260 |
+
return {
|
| 261 |
+
"vulnerabilities": [],
|
| 262 |
+
"security_score": 0
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
async def suggest_refactoring(self, file_path: str, code: str) -> Dict:
|
| 266 |
+
"""
|
| 267 |
+
Suggest code refactoring improvements.
|
| 268 |
+
|
| 269 |
+
Args:
|
| 270 |
+
file_path: Path to the file
|
| 271 |
+
code: Code content
|
| 272 |
+
|
| 273 |
+
Returns:
|
| 274 |
+
Refactoring suggestions
|
| 275 |
+
"""
|
| 276 |
+
logger.info(f"Suggesting refactoring for {file_path}")
|
| 277 |
+
|
| 278 |
+
prompt = f"""Suggest refactoring improvements for this code:
|
| 279 |
+
|
| 280 |
+
FILE: {file_path}
|
| 281 |
+
|
| 282 |
+
CODE:
|
| 283 |
+
{code[:2000]}
|
| 284 |
+
|
| 285 |
+
Focus on:
|
| 286 |
+
1. Code duplication
|
| 287 |
+
2. Complex functions (high cyclomatic complexity)
|
| 288 |
+
3. Poor naming conventions
|
| 289 |
+
4. Missing error handling
|
| 290 |
+
5. Performance optimizations
|
| 291 |
+
6. Type hints and documentation
|
| 292 |
+
|
| 293 |
+
Respond in JSON format:
|
| 294 |
+
{{
|
| 295 |
+
"suggestions": [
|
| 296 |
+
{{
|
| 297 |
+
"category": "category",
|
| 298 |
+
"priority": "high|medium|low",
|
| 299 |
+
"description": "what to improve",
|
| 300 |
+
"benefit": "why improve it"
|
| 301 |
+
}}
|
| 302 |
+
],
|
| 303 |
+
"code_quality_score": 0-100
|
| 304 |
+
}}
|
| 305 |
+
"""
|
| 306 |
+
|
| 307 |
+
try:
|
| 308 |
+
response_text = self.ai_manager.generate_content(
|
| 309 |
+
prompt=prompt,
|
| 310 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 311 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_SMALL,
|
| 312 |
+
response_format="json"
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
return json.loads(response_text)
|
| 316 |
+
|
| 317 |
+
except Exception as e:
|
| 318 |
+
logger.error(f"Error during refactoring analysis: {e}")
|
| 319 |
+
return {
|
| 320 |
+
"suggestions": [],
|
| 321 |
+
"code_quality_score": 0
|
| 322 |
+
}
|
src/agents/classifier.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Code classification using AI."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from typing import Dict, List
|
| 5 |
+
import os
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
from src.config import AIManager, GeminiSchemas
|
| 9 |
+
|
| 10 |
+
load_dotenv()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class CodeClassifier:
|
| 14 |
+
"""Classifies code files into modernization categories using Gemini."""
|
| 15 |
+
|
| 16 |
+
def __init__(self):
|
| 17 |
+
"""Initialize the classifier with AI client."""
|
| 18 |
+
# Use centralized AI manager
|
| 19 |
+
self.ai_manager = AIManager()
|
| 20 |
+
|
| 21 |
+
def classify_files(self, file_list: List[str], batch_size: int = 25) -> Dict[str, str]:
|
| 22 |
+
"""
|
| 23 |
+
Classify files using Gemini with few-shot prompting.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
file_list: List of file paths to classify
|
| 27 |
+
batch_size: Number of files to process per API call
|
| 28 |
+
|
| 29 |
+
Returns:
|
| 30 |
+
Dictionary mapping filenames to categories
|
| 31 |
+
"""
|
| 32 |
+
all_results = {}
|
| 33 |
+
|
| 34 |
+
# Process in batches to avoid token limits
|
| 35 |
+
for i in range(0, len(file_list), batch_size):
|
| 36 |
+
batch = file_list[i:i + batch_size]
|
| 37 |
+
batch_results = self._classify_batch(batch)
|
| 38 |
+
all_results.update(batch_results)
|
| 39 |
+
|
| 40 |
+
return all_results
|
| 41 |
+
|
| 42 |
+
def _classify_batch(self, file_list: List[str]) -> Dict[str, str]:
|
| 43 |
+
"""Classify a batch of files."""
|
| 44 |
+
|
| 45 |
+
prompt = f"""You are a code modernization expert. Classify these files into categories.
|
| 46 |
+
|
| 47 |
+
CATEGORIES:
|
| 48 |
+
- modernize_high: Legacy patterns that need immediate update (Python 2, deprecated libs, security issues)
|
| 49 |
+
- modernize_low: Minor improvements needed (add type hints, optimize imports)
|
| 50 |
+
- skip: Already modern or non-code files
|
| 51 |
+
|
| 52 |
+
FEW-SHOT EXAMPLES:
|
| 53 |
+
1. utils/db.py (uses MySQLdb, string interpolation) → modernize_high
|
| 54 |
+
2. config.py (hardcoded credentials) → modernize_high
|
| 55 |
+
3. models/user.py (missing type hints) → modernize_low
|
| 56 |
+
4. src/api/UserController.java (uses deprecated Vector, no generics) → modernize_high
|
| 57 |
+
5. frontend/app.js (uses jQuery 1.x, inline event handlers) → modernize_high
|
| 58 |
+
6. legacy_php/login.php (mysql_connect, no prepared statements) → modernize_high
|
| 59 |
+
7. README.md → skip
|
| 60 |
+
8. tests/test_api.py (uses unittest, modern Python 3) → skip
|
| 61 |
+
9. package.json → skip
|
| 62 |
+
10. .gitignore → skip
|
| 63 |
+
|
| 64 |
+
FILES TO CLASSIFY:
|
| 65 |
+
{json.dumps(file_list, indent=2)}
|
| 66 |
+
|
| 67 |
+
Return JSON object with filename as key and category as value.
|
| 68 |
+
Example: {{"file1.py": "modernize_high", "file2.js": "skip"}}
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
try:
|
| 72 |
+
# Use JSON schema for guaranteed structure
|
| 73 |
+
schema = GeminiSchemas.file_classification()
|
| 74 |
+
|
| 75 |
+
response_text = self.ai_manager.generate_content(
|
| 76 |
+
prompt=prompt,
|
| 77 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 78 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_MEDIUM,
|
| 79 |
+
response_format="json",
|
| 80 |
+
response_schema=schema
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
result = json.loads(response_text)
|
| 84 |
+
|
| 85 |
+
# Validate results
|
| 86 |
+
valid_categories = {"modernize_high", "modernize_low", "skip"}
|
| 87 |
+
for filename, category in result.items():
|
| 88 |
+
if category not in valid_categories:
|
| 89 |
+
result[filename] = "skip" # Default to skip if invalid
|
| 90 |
+
|
| 91 |
+
return result
|
| 92 |
+
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Error classifying batch: {e}")
|
| 95 |
+
# Return default classifications on error
|
| 96 |
+
return {f: "skip" for f in file_list}
|
| 97 |
+
|
| 98 |
+
def get_statistics(self, classifications: Dict[str, str]) -> Dict[str, int]:
|
| 99 |
+
"""
|
| 100 |
+
Get statistics about classifications.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
classifications: Dictionary of file classifications
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
Dictionary with counts per category
|
| 107 |
+
"""
|
| 108 |
+
stats = {
|
| 109 |
+
"modernize_high": 0,
|
| 110 |
+
"modernize_low": 0,
|
| 111 |
+
"skip": 0,
|
| 112 |
+
"total": len(classifications)
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
for category in classifications.values():
|
| 116 |
+
if category in stats:
|
| 117 |
+
stats[category] += 1
|
| 118 |
+
|
| 119 |
+
return stats
|
src/agents/code_validator.py
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Code Validator - Validates generated code for common issues.
|
| 3 |
+
Catches problems before they reach the sandbox execution phase.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import re
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Dict, List, Tuple
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class CodeValidator:
|
| 14 |
+
"""Validates generated code for common issues and inconsistencies."""
|
| 15 |
+
|
| 16 |
+
@staticmethod
|
| 17 |
+
def validate_typescript_module_system(source_code: str) -> Tuple[bool, List[str]]:
|
| 18 |
+
"""
|
| 19 |
+
Validate that TypeScript code is compatible with Jest/ts-jest (CommonJS).
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
source_code: TypeScript source code
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
(is_valid, list_of_issues)
|
| 26 |
+
"""
|
| 27 |
+
issues = []
|
| 28 |
+
|
| 29 |
+
# Check for ES module-only features that break Jest/ts-jest
|
| 30 |
+
if 'import.meta' in source_code:
|
| 31 |
+
issues.append(
|
| 32 |
+
"Code uses 'import.meta' which requires ES modules. "
|
| 33 |
+
"Jest/ts-jest uses CommonJS. Remove import.meta usage."
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
if re.search(r'\btop-level\s+await\b', source_code) or re.search(r'^await\s+', source_code, re.MULTILINE):
|
| 37 |
+
issues.append(
|
| 38 |
+
"Code uses top-level await which requires ES modules. "
|
| 39 |
+
"Jest/ts-jest uses CommonJS. Wrap in async function."
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Check for CLI execution patterns that shouldn't be in library code
|
| 43 |
+
if 'process.argv[1]' in source_code or 'if (require.main === module)' in source_code:
|
| 44 |
+
issues.append(
|
| 45 |
+
"Code includes CLI execution logic. "
|
| 46 |
+
"Library code should not include main execution blocks."
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
return len(issues) == 0, issues
|
| 50 |
+
|
| 51 |
+
@staticmethod
|
| 52 |
+
def validate_typescript_exports(source_code: str, test_code: str) -> Tuple[bool, List[str]]:
|
| 53 |
+
"""
|
| 54 |
+
Validate that all TypeScript types/enums/interfaces imported in tests are exported in source.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
source_code: TypeScript source code
|
| 58 |
+
test_code: TypeScript test code
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
(is_valid, list_of_issues)
|
| 62 |
+
"""
|
| 63 |
+
issues = []
|
| 64 |
+
|
| 65 |
+
# Extract imports from test code
|
| 66 |
+
import_pattern = r'import\s+\{([^}]+)\}\s+from\s+["\']\./'
|
| 67 |
+
test_imports = re.findall(import_pattern, test_code)
|
| 68 |
+
|
| 69 |
+
if not test_imports:
|
| 70 |
+
return True, []
|
| 71 |
+
|
| 72 |
+
# Get all imported names
|
| 73 |
+
imported_names = set()
|
| 74 |
+
for import_group in test_imports:
|
| 75 |
+
names = [name.strip() for name in import_group.split(',')]
|
| 76 |
+
imported_names.update(names)
|
| 77 |
+
|
| 78 |
+
# Check if each imported name is exported in source
|
| 79 |
+
for name in imported_names:
|
| 80 |
+
# Check for export function/class/enum/interface/type
|
| 81 |
+
export_patterns = [
|
| 82 |
+
rf'export\s+(function|class|enum|interface|type)\s+{name}\b',
|
| 83 |
+
rf'export\s+\{{\s*[^}}]*\b{name}\b[^}}]*\}}',
|
| 84 |
+
rf'export\s+const\s+{name}\s*=',
|
| 85 |
+
]
|
| 86 |
+
|
| 87 |
+
is_exported = any(re.search(pattern, source_code) for pattern in export_patterns)
|
| 88 |
+
|
| 89 |
+
if not is_exported:
|
| 90 |
+
# Check if it's declared but not exported
|
| 91 |
+
declaration_patterns = [
|
| 92 |
+
rf'\b(function|class|enum|interface|type)\s+{name}\b',
|
| 93 |
+
rf'\bconst\s+{name}\s*=',
|
| 94 |
+
]
|
| 95 |
+
is_declared = any(re.search(pattern, source_code) for pattern in declaration_patterns)
|
| 96 |
+
|
| 97 |
+
if is_declared:
|
| 98 |
+
issues.append(
|
| 99 |
+
f"'{name}' is declared in source but not exported. "
|
| 100 |
+
f"Add 'export' keyword before the declaration."
|
| 101 |
+
)
|
| 102 |
+
else:
|
| 103 |
+
issues.append(
|
| 104 |
+
f"'{name}' is imported in tests but not found in source code."
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
return len(issues) == 0, issues
|
| 108 |
+
|
| 109 |
+
@staticmethod
|
| 110 |
+
def validate_javascript_exports(source_code: str, test_code: str) -> Tuple[bool, List[str]]:
|
| 111 |
+
"""
|
| 112 |
+
Validate that all JavaScript functions/classes imported in tests are exported in source.
|
| 113 |
+
|
| 114 |
+
Args:
|
| 115 |
+
source_code: JavaScript source code
|
| 116 |
+
test_code: JavaScript test code
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
(is_valid, list_of_issues)
|
| 120 |
+
"""
|
| 121 |
+
issues = []
|
| 122 |
+
|
| 123 |
+
# Extract imports from test code (ES6 imports)
|
| 124 |
+
import_pattern = r'import\s+\{([^}]+)\}\s+from\s+["\']\./'
|
| 125 |
+
test_imports = re.findall(import_pattern, test_code)
|
| 126 |
+
|
| 127 |
+
if not test_imports:
|
| 128 |
+
return True, []
|
| 129 |
+
|
| 130 |
+
# Get all imported names
|
| 131 |
+
imported_names = set()
|
| 132 |
+
for import_group in test_imports:
|
| 133 |
+
names = [name.strip() for name in import_group.split(',')]
|
| 134 |
+
imported_names.update(names)
|
| 135 |
+
|
| 136 |
+
# Check if each imported name is exported in source
|
| 137 |
+
for name in imported_names:
|
| 138 |
+
# Check for various export patterns
|
| 139 |
+
export_patterns = [
|
| 140 |
+
rf'export\s+(function|class|const|let|var)\s+{name}\b',
|
| 141 |
+
rf'export\s+\{{\s*[^}}]*\b{name}\b[^}}]*\}}',
|
| 142 |
+
rf'module\.exports\s*=\s*\{{[^}}]*\b{name}\b[^}}]*\}}',
|
| 143 |
+
rf'exports\.{name}\s*=',
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
is_exported = any(re.search(pattern, source_code) for pattern in export_patterns)
|
| 147 |
+
|
| 148 |
+
if not is_exported:
|
| 149 |
+
issues.append(
|
| 150 |
+
f"'{name}' is imported in tests but not exported in source. "
|
| 151 |
+
f"Add it to the export statement."
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
return len(issues) == 0, issues
|
| 155 |
+
|
| 156 |
+
@staticmethod
|
| 157 |
+
def validate_python_imports(source_code: str, test_code: str) -> Tuple[bool, List[str]]:
|
| 158 |
+
"""
|
| 159 |
+
Validate that all Python functions/classes imported in tests exist in source.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
source_code: Python source code
|
| 163 |
+
test_code: Python test code
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
(is_valid, list_of_issues)
|
| 167 |
+
"""
|
| 168 |
+
issues = []
|
| 169 |
+
|
| 170 |
+
# Extract imports from test code
|
| 171 |
+
import_patterns = [
|
| 172 |
+
r'from\s+\w+\s+import\s+([^#\n]+)',
|
| 173 |
+
r'import\s+(\w+)',
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
imported_names = set()
|
| 177 |
+
for pattern in import_patterns:
|
| 178 |
+
matches = re.findall(pattern, test_code)
|
| 179 |
+
for match in matches:
|
| 180 |
+
names = [name.strip() for name in match.split(',')]
|
| 181 |
+
imported_names.update(names)
|
| 182 |
+
|
| 183 |
+
# Check if each imported name is defined in source
|
| 184 |
+
for name in imported_names:
|
| 185 |
+
# Check for function/class definitions
|
| 186 |
+
definition_patterns = [
|
| 187 |
+
rf'^def\s+{name}\s*\(',
|
| 188 |
+
rf'^class\s+{name}\b',
|
| 189 |
+
rf'^{name}\s*=',
|
| 190 |
+
]
|
| 191 |
+
|
| 192 |
+
is_defined = any(re.search(pattern, source_code, re.MULTILINE) for pattern in definition_patterns)
|
| 193 |
+
|
| 194 |
+
if not is_defined:
|
| 195 |
+
issues.append(
|
| 196 |
+
f"'{name}' is imported in tests but not defined in source code."
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
return len(issues) == 0, issues
|
| 200 |
+
|
| 201 |
+
@staticmethod
|
| 202 |
+
def validate_code(source_code: str, test_code: str, language: str) -> Tuple[bool, List[str]]:
|
| 203 |
+
"""
|
| 204 |
+
Validate code based on language.
|
| 205 |
+
|
| 206 |
+
Args:
|
| 207 |
+
source_code: Source code
|
| 208 |
+
test_code: Test code
|
| 209 |
+
language: Programming language
|
| 210 |
+
|
| 211 |
+
Returns:
|
| 212 |
+
(is_valid, list_of_issues)
|
| 213 |
+
"""
|
| 214 |
+
language = language.lower()
|
| 215 |
+
all_issues = []
|
| 216 |
+
|
| 217 |
+
if language == 'typescript':
|
| 218 |
+
# Check module system compatibility
|
| 219 |
+
is_valid_module, module_issues = CodeValidator.validate_typescript_module_system(source_code)
|
| 220 |
+
all_issues.extend(module_issues)
|
| 221 |
+
|
| 222 |
+
# Check exports
|
| 223 |
+
is_valid_exports, export_issues = CodeValidator.validate_typescript_exports(source_code, test_code)
|
| 224 |
+
all_issues.extend(export_issues)
|
| 225 |
+
|
| 226 |
+
return len(all_issues) == 0, all_issues
|
| 227 |
+
elif language == 'javascript':
|
| 228 |
+
return CodeValidator.validate_javascript_exports(source_code, test_code)
|
| 229 |
+
elif language == 'python':
|
| 230 |
+
return CodeValidator.validate_python_imports(source_code, test_code)
|
| 231 |
+
else:
|
| 232 |
+
# No validation for other languages yet
|
| 233 |
+
return True, []
|
| 234 |
+
|
| 235 |
+
@staticmethod
|
| 236 |
+
def auto_fix_typescript_module_system(source_code: str) -> str:
|
| 237 |
+
"""
|
| 238 |
+
Remove ES module-only features that break Jest/ts-jest.
|
| 239 |
+
|
| 240 |
+
Args:
|
| 241 |
+
source_code: TypeScript source code
|
| 242 |
+
|
| 243 |
+
Returns:
|
| 244 |
+
Fixed source code
|
| 245 |
+
"""
|
| 246 |
+
fixed_code = source_code
|
| 247 |
+
|
| 248 |
+
# Remove import.meta usage and related code
|
| 249 |
+
if 'import.meta' in fixed_code:
|
| 250 |
+
# Remove the entire CLI execution block that uses import.meta
|
| 251 |
+
# Pattern: from import statement to the end of the if block
|
| 252 |
+
pattern = r'\n// Modern ES module.*?\n.*?import.*?from [\'"]url[\'"];.*?\n.*?import.*?from [\'"]path[\'"];.*?\n\nconst __filename.*?import\.meta\.url\);.*?\n.*?if \(process\.argv\[1\].*?\{.*?\n.*?\n.*?\n\}'
|
| 253 |
+
fixed_code = re.sub(pattern, '', fixed_code, flags=re.DOTALL)
|
| 254 |
+
|
| 255 |
+
# Fallback: remove just the import.meta line
|
| 256 |
+
if 'import.meta' in fixed_code:
|
| 257 |
+
fixed_code = re.sub(r'.*import\.meta.*\n', '', fixed_code)
|
| 258 |
+
|
| 259 |
+
logger.info("Auto-fixed: Removed import.meta usage")
|
| 260 |
+
|
| 261 |
+
# Remove CLI execution patterns
|
| 262 |
+
if 'process.argv[1]' in fixed_code:
|
| 263 |
+
# Remove if (process.argv[1] === __filename) blocks
|
| 264 |
+
pattern = r'\nif \(process\.argv\[1\].*?\{[^}]*\}'
|
| 265 |
+
fixed_code = re.sub(pattern, '', fixed_code, flags=re.DOTALL)
|
| 266 |
+
logger.info("Auto-fixed: Removed CLI execution block")
|
| 267 |
+
|
| 268 |
+
return fixed_code
|
| 269 |
+
|
| 270 |
+
@staticmethod
|
| 271 |
+
def auto_fix_typescript_exports(source_code: str, missing_exports: List[str]) -> str:
|
| 272 |
+
"""
|
| 273 |
+
Automatically add export keywords to TypeScript declarations.
|
| 274 |
+
|
| 275 |
+
Args:
|
| 276 |
+
source_code: TypeScript source code
|
| 277 |
+
missing_exports: List of names that need to be exported
|
| 278 |
+
|
| 279 |
+
Returns:
|
| 280 |
+
Fixed source code
|
| 281 |
+
"""
|
| 282 |
+
fixed_code = source_code
|
| 283 |
+
|
| 284 |
+
for name in missing_exports:
|
| 285 |
+
# Try to add export keyword before declaration
|
| 286 |
+
patterns = [
|
| 287 |
+
(rf'(\n)(enum\s+{name}\b)', r'\1export \2'),
|
| 288 |
+
(rf'(\n)(interface\s+{name}\b)', r'\1export \2'),
|
| 289 |
+
(rf'(\n)(type\s+{name}\b)', r'\1export \2'),
|
| 290 |
+
(rf'(\n)(class\s+{name}\b)', r'\1export \2'),
|
| 291 |
+
(rf'(\n)(function\s+{name}\b)', r'\1export \2'),
|
| 292 |
+
(rf'(\n)(const\s+{name}\s*=)', r'\1export \2'),
|
| 293 |
+
]
|
| 294 |
+
|
| 295 |
+
for pattern, replacement in patterns:
|
| 296 |
+
new_code = re.sub(pattern, replacement, fixed_code)
|
| 297 |
+
if new_code != fixed_code:
|
| 298 |
+
logger.info(f"Auto-fixed: Added 'export' to '{name}'")
|
| 299 |
+
fixed_code = new_code
|
| 300 |
+
break
|
| 301 |
+
|
| 302 |
+
return fixed_code
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
def validate_and_fix_code(source_code: str, test_code: str, language: str) -> Tuple[str, bool, List[str]]:
|
| 306 |
+
"""
|
| 307 |
+
Validate code and attempt to auto-fix common issues.
|
| 308 |
+
|
| 309 |
+
Args:
|
| 310 |
+
source_code: Source code
|
| 311 |
+
test_code: Test code
|
| 312 |
+
language: Programming language
|
| 313 |
+
|
| 314 |
+
Returns:
|
| 315 |
+
(fixed_source_code, is_valid, list_of_remaining_issues)
|
| 316 |
+
"""
|
| 317 |
+
validator = CodeValidator()
|
| 318 |
+
is_valid, issues = validator.validate_code(source_code, test_code, language)
|
| 319 |
+
|
| 320 |
+
if not is_valid and language.lower() == 'typescript':
|
| 321 |
+
fixed_code = source_code
|
| 322 |
+
|
| 323 |
+
# Auto-fix module system issues (import.meta, etc.)
|
| 324 |
+
module_issues = [issue for issue in issues if 'import.meta' in issue or 'top-level await' in issue or 'CLI execution' in issue]
|
| 325 |
+
if module_issues:
|
| 326 |
+
logger.info(f"Attempting to auto-fix {len(module_issues)} module system issues")
|
| 327 |
+
fixed_code = validator.auto_fix_typescript_module_system(fixed_code)
|
| 328 |
+
|
| 329 |
+
# Auto-fix export issues
|
| 330 |
+
missing_names = []
|
| 331 |
+
for issue in issues:
|
| 332 |
+
# Extract name from issue message
|
| 333 |
+
match = re.search(r"'(\w+)'", issue)
|
| 334 |
+
if match and "not exported" in issue:
|
| 335 |
+
missing_names.append(match.group(1))
|
| 336 |
+
|
| 337 |
+
if missing_names:
|
| 338 |
+
logger.info(f"Attempting to auto-fix {len(missing_names)} export issues")
|
| 339 |
+
fixed_code = validator.auto_fix_typescript_exports(fixed_code, missing_names)
|
| 340 |
+
|
| 341 |
+
# Re-validate if we made any fixes
|
| 342 |
+
if fixed_code != source_code:
|
| 343 |
+
is_valid, issues = validator.validate_code(fixed_code, test_code, language)
|
| 344 |
+
return fixed_code, is_valid, issues
|
| 345 |
+
|
| 346 |
+
return source_code, is_valid, issues
|
src/agents/pattern_integration.py
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Integration layer for the new IntelligentPatternMatcher with existing workflow.
|
| 3 |
+
Provides backward compatibility while enabling advanced pattern detection.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import logging
|
| 7 |
+
from typing import Dict, List, Optional
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
from .pattern_matcher import (
|
| 11 |
+
IntelligentPatternMatcher,
|
| 12 |
+
FileAnalysis,
|
| 13 |
+
PatternSeverity
|
| 14 |
+
)
|
| 15 |
+
from .classifier import CodeClassifier
|
| 16 |
+
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class PatternMatcherIntegration:
|
| 21 |
+
"""
|
| 22 |
+
Integrates IntelligentPatternMatcher with existing workflow.
|
| 23 |
+
Provides compatibility layer for gradual migration.
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
def __init__(self, use_intelligent_matcher: bool = True, cache_dir: Optional[str] = None):
|
| 27 |
+
"""
|
| 28 |
+
Initialize integration layer.
|
| 29 |
+
|
| 30 |
+
Args:
|
| 31 |
+
use_intelligent_matcher: If True, use new AI-powered matcher
|
| 32 |
+
cache_dir: Optional cache directory for pattern analysis
|
| 33 |
+
"""
|
| 34 |
+
self.use_intelligent_matcher = use_intelligent_matcher
|
| 35 |
+
|
| 36 |
+
if use_intelligent_matcher:
|
| 37 |
+
self.pattern_matcher = IntelligentPatternMatcher(cache_dir=cache_dir)
|
| 38 |
+
logger.info("Using IntelligentPatternMatcher")
|
| 39 |
+
else:
|
| 40 |
+
self.classifier = CodeClassifier()
|
| 41 |
+
logger.info("Using legacy CodeClassifier")
|
| 42 |
+
|
| 43 |
+
def classify_files(self, files: List[str], file_contents: Optional[Dict[str, str]] = None) -> Dict[str, str]:
|
| 44 |
+
"""
|
| 45 |
+
Classify files using either intelligent matcher or legacy classifier.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
files: List of file paths
|
| 49 |
+
file_contents: Optional dict of file contents (required for intelligent matcher)
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Dictionary mapping filenames to categories
|
| 53 |
+
Categories: 'modernize_high', 'modernize_low', 'skip'
|
| 54 |
+
"""
|
| 55 |
+
if self.use_intelligent_matcher:
|
| 56 |
+
return self._classify_with_intelligent_matcher(files, file_contents)
|
| 57 |
+
else:
|
| 58 |
+
return self.classifier.classify_files(files)
|
| 59 |
+
|
| 60 |
+
def _classify_with_intelligent_matcher(
|
| 61 |
+
self,
|
| 62 |
+
files: List[str],
|
| 63 |
+
file_contents: Optional[Dict[str, str]]
|
| 64 |
+
) -> Dict[str, str]:
|
| 65 |
+
"""
|
| 66 |
+
Classify files using intelligent pattern matcher.
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
files: List of file paths
|
| 70 |
+
file_contents: Dictionary of file contents
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
Dictionary mapping filenames to categories
|
| 74 |
+
"""
|
| 75 |
+
if not file_contents:
|
| 76 |
+
logger.warning("No file contents provided, falling back to legacy classifier")
|
| 77 |
+
return self.classifier.classify_files(files)
|
| 78 |
+
|
| 79 |
+
classifications = {}
|
| 80 |
+
|
| 81 |
+
# Analyze files
|
| 82 |
+
analyses = self.pattern_matcher.analyze_batch(file_contents)
|
| 83 |
+
|
| 84 |
+
# Convert analyses to legacy classification format
|
| 85 |
+
for file_path, analysis in analyses.items():
|
| 86 |
+
category = self._analysis_to_category(analysis)
|
| 87 |
+
classifications[file_path] = category
|
| 88 |
+
|
| 89 |
+
return classifications
|
| 90 |
+
|
| 91 |
+
def _analysis_to_category(self, analysis: FileAnalysis) -> str:
|
| 92 |
+
"""
|
| 93 |
+
Convert FileAnalysis to legacy category format.
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
analysis: FileAnalysis object
|
| 97 |
+
|
| 98 |
+
Returns:
|
| 99 |
+
Category string: 'modernize_high', 'modernize_low', or 'skip'
|
| 100 |
+
"""
|
| 101 |
+
if not analysis.requires_modernization:
|
| 102 |
+
return 'skip'
|
| 103 |
+
|
| 104 |
+
# Check for critical or high severity patterns
|
| 105 |
+
has_critical = any(
|
| 106 |
+
p.severity == PatternSeverity.CRITICAL
|
| 107 |
+
for p in analysis.patterns
|
| 108 |
+
)
|
| 109 |
+
has_high = any(
|
| 110 |
+
p.severity == PatternSeverity.HIGH
|
| 111 |
+
for p in analysis.patterns
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
# Check modernization score
|
| 115 |
+
if has_critical or analysis.modernization_score < 50:
|
| 116 |
+
return 'modernize_high'
|
| 117 |
+
elif has_high or analysis.modernization_score < 75:
|
| 118 |
+
return 'modernize_high'
|
| 119 |
+
elif analysis.requires_modernization:
|
| 120 |
+
return 'modernize_low'
|
| 121 |
+
else:
|
| 122 |
+
return 'skip'
|
| 123 |
+
|
| 124 |
+
def get_detailed_analysis(self, file_path: str, code: str) -> FileAnalysis:
|
| 125 |
+
"""
|
| 126 |
+
Get detailed pattern analysis for a single file.
|
| 127 |
+
|
| 128 |
+
Args:
|
| 129 |
+
file_path: Path to the file
|
| 130 |
+
code: File contents
|
| 131 |
+
|
| 132 |
+
Returns:
|
| 133 |
+
FileAnalysis object with detailed pattern information
|
| 134 |
+
"""
|
| 135 |
+
if not self.use_intelligent_matcher:
|
| 136 |
+
raise ValueError("Detailed analysis requires intelligent matcher")
|
| 137 |
+
|
| 138 |
+
return self.pattern_matcher.analyze_file(file_path, code)
|
| 139 |
+
|
| 140 |
+
def get_transformation_plan(self, analysis: FileAnalysis) -> Dict:
|
| 141 |
+
"""
|
| 142 |
+
Convert FileAnalysis to transformation plan format.
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
analysis: FileAnalysis object
|
| 146 |
+
|
| 147 |
+
Returns:
|
| 148 |
+
Transformation plan dictionary compatible with CodeTransformer
|
| 149 |
+
"""
|
| 150 |
+
# Group patterns by type
|
| 151 |
+
pattern_groups = {}
|
| 152 |
+
for pattern in analysis.patterns:
|
| 153 |
+
if pattern.pattern_type not in pattern_groups:
|
| 154 |
+
pattern_groups[pattern.pattern_type] = []
|
| 155 |
+
pattern_groups[pattern.pattern_type].append(pattern)
|
| 156 |
+
|
| 157 |
+
# Build transformation steps
|
| 158 |
+
steps = []
|
| 159 |
+
total_effort = 0
|
| 160 |
+
|
| 161 |
+
for pattern_type, patterns in pattern_groups.items():
|
| 162 |
+
# Get highest severity pattern for this type
|
| 163 |
+
highest_severity = max(patterns, key=lambda p: self._severity_to_int(p.severity))
|
| 164 |
+
|
| 165 |
+
steps.append({
|
| 166 |
+
'pattern': pattern_type,
|
| 167 |
+
'severity': highest_severity.severity.value,
|
| 168 |
+
'description': highest_severity.description,
|
| 169 |
+
'recommendation': highest_severity.recommendation,
|
| 170 |
+
'line_numbers': highest_severity.line_numbers,
|
| 171 |
+
'confidence': highest_severity.confidence
|
| 172 |
+
})
|
| 173 |
+
|
| 174 |
+
total_effort += highest_severity.estimated_effort_hours
|
| 175 |
+
|
| 176 |
+
return {
|
| 177 |
+
'file_path': analysis.file_path,
|
| 178 |
+
'language': analysis.language,
|
| 179 |
+
'framework': analysis.framework,
|
| 180 |
+
'pattern': f"{analysis.language} modernization",
|
| 181 |
+
'steps': steps,
|
| 182 |
+
'estimated_effort_hours': total_effort,
|
| 183 |
+
'priority': analysis.overall_priority.value,
|
| 184 |
+
'modernization_score': analysis.modernization_score
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
def _severity_to_int(self, severity: PatternSeverity) -> int:
|
| 188 |
+
"""Convert severity to integer for comparison."""
|
| 189 |
+
severity_map = {
|
| 190 |
+
PatternSeverity.CRITICAL: 5,
|
| 191 |
+
PatternSeverity.HIGH: 4,
|
| 192 |
+
PatternSeverity.MEDIUM: 3,
|
| 193 |
+
PatternSeverity.LOW: 2,
|
| 194 |
+
PatternSeverity.INFO: 1
|
| 195 |
+
}
|
| 196 |
+
return severity_map.get(severity, 0)
|
| 197 |
+
|
| 198 |
+
def generate_statistics(self, analyses: Dict[str, FileAnalysis]) -> Dict:
|
| 199 |
+
"""
|
| 200 |
+
Generate statistics from pattern analyses.
|
| 201 |
+
|
| 202 |
+
Args:
|
| 203 |
+
analyses: Dictionary of file analyses
|
| 204 |
+
|
| 205 |
+
Returns:
|
| 206 |
+
Statistics dictionary
|
| 207 |
+
"""
|
| 208 |
+
total_files = len(analyses)
|
| 209 |
+
|
| 210 |
+
# Count by category
|
| 211 |
+
modernize_high = sum(
|
| 212 |
+
1 for a in analyses.values()
|
| 213 |
+
if self._analysis_to_category(a) == 'modernize_high'
|
| 214 |
+
)
|
| 215 |
+
modernize_low = sum(
|
| 216 |
+
1 for a in analyses.values()
|
| 217 |
+
if self._analysis_to_category(a) == 'modernize_low'
|
| 218 |
+
)
|
| 219 |
+
skip = total_files - modernize_high - modernize_low
|
| 220 |
+
|
| 221 |
+
# Count patterns by severity
|
| 222 |
+
severity_counts = {s.value: 0 for s in PatternSeverity}
|
| 223 |
+
for analysis in analyses.values():
|
| 224 |
+
for pattern in analysis.patterns:
|
| 225 |
+
severity_counts[pattern.severity.value] += 1
|
| 226 |
+
|
| 227 |
+
# Calculate average scores
|
| 228 |
+
avg_modernization_score = (
|
| 229 |
+
sum(a.modernization_score for a in analyses.values()) / max(total_files, 1)
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
# Estimate total effort
|
| 233 |
+
total_effort = sum(
|
| 234 |
+
sum(p.estimated_effort_hours for p in a.patterns)
|
| 235 |
+
for a in analyses.values()
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
return {
|
| 239 |
+
'total_files': total_files,
|
| 240 |
+
'modernize_high': modernize_high,
|
| 241 |
+
'modernize_low': modernize_low,
|
| 242 |
+
'skip': skip,
|
| 243 |
+
'severity_counts': severity_counts,
|
| 244 |
+
'average_modernization_score': round(avg_modernization_score, 2),
|
| 245 |
+
'total_estimated_effort_hours': round(total_effort, 2),
|
| 246 |
+
'patterns_detected': sum(len(a.patterns) for a in analyses.values())
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def migrate_to_intelligent_matcher(
|
| 251 |
+
orchestrator,
|
| 252 |
+
repo_path: str,
|
| 253 |
+
file_contents: Dict[str, str]
|
| 254 |
+
) -> Dict:
|
| 255 |
+
"""
|
| 256 |
+
Helper function to migrate existing orchestrator to use intelligent matcher.
|
| 257 |
+
|
| 258 |
+
Args:
|
| 259 |
+
orchestrator: ModernizationOrchestrator instance
|
| 260 |
+
repo_path: Path to repository
|
| 261 |
+
file_contents: Dictionary of file contents
|
| 262 |
+
|
| 263 |
+
Returns:
|
| 264 |
+
Enhanced results with detailed pattern analysis
|
| 265 |
+
"""
|
| 266 |
+
logger.info("Migrating to IntelligentPatternMatcher")
|
| 267 |
+
|
| 268 |
+
# Create integration layer
|
| 269 |
+
integration = PatternMatcherIntegration(
|
| 270 |
+
use_intelligent_matcher=True,
|
| 271 |
+
cache_dir=Path(repo_path) / ".pattern_cache"
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
# Analyze all files
|
| 275 |
+
analyses = integration.pattern_matcher.analyze_batch(file_contents)
|
| 276 |
+
|
| 277 |
+
# Generate prioritized list
|
| 278 |
+
prioritized = integration.pattern_matcher.prioritize_files(analyses)
|
| 279 |
+
|
| 280 |
+
# Convert to transformation plans
|
| 281 |
+
transformation_plans = {}
|
| 282 |
+
for file_path, analysis in prioritized:
|
| 283 |
+
if analysis.requires_modernization:
|
| 284 |
+
plan = integration.get_transformation_plan(analysis)
|
| 285 |
+
transformation_plans[file_path] = plan
|
| 286 |
+
|
| 287 |
+
# Generate report
|
| 288 |
+
report = integration.pattern_matcher.generate_report(analyses)
|
| 289 |
+
|
| 290 |
+
return {
|
| 291 |
+
'analyses': analyses,
|
| 292 |
+
'prioritized_files': prioritized,
|
| 293 |
+
'transformation_plans': transformation_plans,
|
| 294 |
+
'statistics': integration.generate_statistics(analyses),
|
| 295 |
+
'report': report
|
| 296 |
+
}
|
src/agents/pattern_matcher.py
ADDED
|
@@ -0,0 +1,838 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Production-grade pattern matching system with AI-powered file type detection.
|
| 3 |
+
Replaces the simple primary/secondary classification with intelligent pattern detection.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Dict, List, Optional, Tuple
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
import json
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from enum import Enum
|
| 13 |
+
|
| 14 |
+
from src.config import AIManager, GeminiSchemas
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class PatternSeverity(Enum):
|
| 20 |
+
"""Severity levels for detected patterns."""
|
| 21 |
+
CRITICAL = "critical" # Security issues, breaking changes
|
| 22 |
+
HIGH = "high" # Deprecated APIs, performance issues
|
| 23 |
+
MEDIUM = "medium" # Code quality, maintainability
|
| 24 |
+
LOW = "low" # Style, minor improvements
|
| 25 |
+
INFO = "info" # Informational only
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@dataclass
|
| 29 |
+
class DetectedPattern:
|
| 30 |
+
"""Represents a detected legacy pattern."""
|
| 31 |
+
pattern_type: str
|
| 32 |
+
severity: PatternSeverity
|
| 33 |
+
file_path: str
|
| 34 |
+
language: str
|
| 35 |
+
description: str
|
| 36 |
+
line_numbers: List[int]
|
| 37 |
+
confidence: float # 0.0 to 1.0
|
| 38 |
+
recommendation: str
|
| 39 |
+
estimated_effort_hours: float
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@dataclass
|
| 43 |
+
class FileAnalysis:
|
| 44 |
+
"""Complete analysis of a single file."""
|
| 45 |
+
file_path: str
|
| 46 |
+
language: str
|
| 47 |
+
framework: Optional[str]
|
| 48 |
+
patterns: List[DetectedPattern]
|
| 49 |
+
overall_priority: PatternSeverity
|
| 50 |
+
modernization_score: float # 0-100, higher = more modern
|
| 51 |
+
requires_modernization: bool
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class IntelligentPatternMatcher:
|
| 55 |
+
"""
|
| 56 |
+
Production-grade pattern matcher using AI for intelligent detection.
|
| 57 |
+
|
| 58 |
+
Features:
|
| 59 |
+
- Language-agnostic pattern detection
|
| 60 |
+
- Context-aware analysis
|
| 61 |
+
- Confidence scoring
|
| 62 |
+
- Batch processing optimization
|
| 63 |
+
- Caching for performance
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
# Language detection patterns
|
| 67 |
+
LANGUAGE_PATTERNS = {
|
| 68 |
+
# Python
|
| 69 |
+
'.py': 'Python',
|
| 70 |
+
'.pyw': 'Python',
|
| 71 |
+
'.pyx': 'Python (Cython)',
|
| 72 |
+
# Java
|
| 73 |
+
'.java': 'Java',
|
| 74 |
+
# JavaScript/TypeScript
|
| 75 |
+
'.js': 'JavaScript',
|
| 76 |
+
'.jsx': 'JavaScript (React)',
|
| 77 |
+
'.mjs': 'JavaScript (ES Module)',
|
| 78 |
+
'.cjs': 'JavaScript (CommonJS)',
|
| 79 |
+
'.ts': 'TypeScript',
|
| 80 |
+
'.tsx': 'TypeScript (React)',
|
| 81 |
+
# PHP
|
| 82 |
+
'.php': 'PHP',
|
| 83 |
+
'.php3': 'PHP',
|
| 84 |
+
'.php4': 'PHP',
|
| 85 |
+
'.php5': 'PHP',
|
| 86 |
+
'.phtml': 'PHP',
|
| 87 |
+
# Ruby
|
| 88 |
+
'.rb': 'Ruby',
|
| 89 |
+
'.rbw': 'Ruby',
|
| 90 |
+
# Go
|
| 91 |
+
'.go': 'Go',
|
| 92 |
+
# C/C++
|
| 93 |
+
'.c': 'C',
|
| 94 |
+
'.h': 'C/C++ Header',
|
| 95 |
+
'.cpp': 'C++',
|
| 96 |
+
'.cc': 'C++',
|
| 97 |
+
'.cxx': 'C++',
|
| 98 |
+
'.c++': 'C++',
|
| 99 |
+
'.hpp': 'C++ Header',
|
| 100 |
+
'.hh': 'C++ Header',
|
| 101 |
+
'.hxx': 'C++ Header',
|
| 102 |
+
'.h++': 'C++ Header',
|
| 103 |
+
# C#
|
| 104 |
+
'.cs': 'C#',
|
| 105 |
+
# Rust
|
| 106 |
+
'.rs': 'Rust',
|
| 107 |
+
# Kotlin
|
| 108 |
+
'.kt': 'Kotlin',
|
| 109 |
+
'.kts': 'Kotlin Script',
|
| 110 |
+
# Swift
|
| 111 |
+
'.swift': 'Swift',
|
| 112 |
+
# Scala
|
| 113 |
+
'.scala': 'Scala',
|
| 114 |
+
'.sc': 'Scala Script',
|
| 115 |
+
# R
|
| 116 |
+
'.r': 'R',
|
| 117 |
+
'.R': 'R',
|
| 118 |
+
# Perl
|
| 119 |
+
'.pl': 'Perl',
|
| 120 |
+
'.pm': 'Perl Module',
|
| 121 |
+
'.t': 'Perl Test',
|
| 122 |
+
'.pod': 'Perl Documentation',
|
| 123 |
+
# Shell
|
| 124 |
+
'.sh': 'Shell',
|
| 125 |
+
'.bash': 'Bash',
|
| 126 |
+
'.zsh': 'Zsh',
|
| 127 |
+
'.fish': 'Fish Shell'
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
# Common legacy patterns by language
|
| 131 |
+
LEGACY_PATTERNS = {
|
| 132 |
+
'Python': [
|
| 133 |
+
'Python 2 syntax (print statements, old-style classes)',
|
| 134 |
+
'Deprecated libraries (MySQLdb, urllib2, optparse)',
|
| 135 |
+
'Missing type hints',
|
| 136 |
+
'Hardcoded credentials',
|
| 137 |
+
'SQL injection vulnerabilities',
|
| 138 |
+
'Insecure cryptography (MD5, SHA1 for passwords)',
|
| 139 |
+
'Global variables and mutable defaults',
|
| 140 |
+
'Missing error handling',
|
| 141 |
+
'Synchronous I/O in async contexts'
|
| 142 |
+
],
|
| 143 |
+
'Java': [
|
| 144 |
+
'Pre-Java 8 code (no lambdas, streams)',
|
| 145 |
+
'Deprecated APIs (Vector, Hashtable, Date)',
|
| 146 |
+
'Missing generics',
|
| 147 |
+
'Raw JDBC without ORM',
|
| 148 |
+
'Synchronization issues',
|
| 149 |
+
'Resource leaks (missing try-with-resources)',
|
| 150 |
+
'Hardcoded configuration',
|
| 151 |
+
'Missing null checks'
|
| 152 |
+
],
|
| 153 |
+
'JavaScript': [
|
| 154 |
+
'var instead of let/const',
|
| 155 |
+
'Callback hell (no Promises/async-await)',
|
| 156 |
+
'jQuery for DOM manipulation',
|
| 157 |
+
'eval() usage',
|
| 158 |
+
'Missing strict mode',
|
| 159 |
+
'Prototype-based inheritance',
|
| 160 |
+
'Global namespace pollution',
|
| 161 |
+
'XSS vulnerabilities'
|
| 162 |
+
],
|
| 163 |
+
'TypeScript': [
|
| 164 |
+
'any type overuse',
|
| 165 |
+
'Missing strict mode',
|
| 166 |
+
'Old module syntax',
|
| 167 |
+
'Missing null checks',
|
| 168 |
+
'Implicit any',
|
| 169 |
+
'Type assertions instead of guards'
|
| 170 |
+
],
|
| 171 |
+
'PHP': [
|
| 172 |
+
'mysql_* functions (deprecated)',
|
| 173 |
+
'No prepared statements',
|
| 174 |
+
'register_globals usage',
|
| 175 |
+
'eval() and create_function()',
|
| 176 |
+
'Missing input validation',
|
| 177 |
+
'Outdated PHP version syntax',
|
| 178 |
+
'No namespace usage',
|
| 179 |
+
'Missing error handling'
|
| 180 |
+
],
|
| 181 |
+
'Ruby': [
|
| 182 |
+
'Ruby 1.8/1.9 syntax',
|
| 183 |
+
'Missing bundler',
|
| 184 |
+
'Deprecated gem versions',
|
| 185 |
+
'Missing RSpec/Minitest',
|
| 186 |
+
'Global variables',
|
| 187 |
+
'Missing error handling',
|
| 188 |
+
'Synchronous I/O'
|
| 189 |
+
],
|
| 190 |
+
'Go': [
|
| 191 |
+
'Missing error handling',
|
| 192 |
+
'Deprecated packages',
|
| 193 |
+
'No context usage',
|
| 194 |
+
'Missing defer for cleanup',
|
| 195 |
+
'Goroutine leaks',
|
| 196 |
+
'Race conditions'
|
| 197 |
+
],
|
| 198 |
+
'C++': [
|
| 199 |
+
'Raw pointers instead of smart pointers',
|
| 200 |
+
'Manual memory management',
|
| 201 |
+
'Missing RAII',
|
| 202 |
+
'C-style casts',
|
| 203 |
+
'Missing const correctness',
|
| 204 |
+
'No move semantics',
|
| 205 |
+
'Deprecated C++98/03 features'
|
| 206 |
+
],
|
| 207 |
+
'C#': [
|
| 208 |
+
'Missing async/await patterns',
|
| 209 |
+
'Old collection types',
|
| 210 |
+
'Missing LINQ usage',
|
| 211 |
+
'Deprecated .NET Framework APIs',
|
| 212 |
+
'Missing nullable reference types',
|
| 213 |
+
'Old string concatenation',
|
| 214 |
+
'Missing using statements'
|
| 215 |
+
],
|
| 216 |
+
'Rust': [
|
| 217 |
+
'Deprecated Rust 2015/2018 syntax',
|
| 218 |
+
'Missing error handling with Result',
|
| 219 |
+
'Unsafe code blocks',
|
| 220 |
+
'Missing lifetime annotations',
|
| 221 |
+
'Deprecated crate versions',
|
| 222 |
+
'Missing async/await'
|
| 223 |
+
],
|
| 224 |
+
'Kotlin': [
|
| 225 |
+
'Java-style code in Kotlin',
|
| 226 |
+
'Missing null safety',
|
| 227 |
+
'Not using coroutines',
|
| 228 |
+
'Missing data classes',
|
| 229 |
+
'Old collection APIs',
|
| 230 |
+
'Missing extension functions'
|
| 231 |
+
],
|
| 232 |
+
'Swift': [
|
| 233 |
+
'Objective-C style code',
|
| 234 |
+
'Missing optionals',
|
| 235 |
+
'Old closure syntax',
|
| 236 |
+
'Missing guard statements',
|
| 237 |
+
'Deprecated Swift 4 features',
|
| 238 |
+
'Missing Codable protocol'
|
| 239 |
+
],
|
| 240 |
+
'Scala': [
|
| 241 |
+
'Scala 2.x syntax',
|
| 242 |
+
'Missing for-comprehensions',
|
| 243 |
+
'Old collection APIs',
|
| 244 |
+
'Missing implicit conversions',
|
| 245 |
+
'Deprecated Future usage',
|
| 246 |
+
'Missing case classes'
|
| 247 |
+
],
|
| 248 |
+
'R': [
|
| 249 |
+
'Old R syntax',
|
| 250 |
+
'Missing tidyverse usage',
|
| 251 |
+
'Deprecated package versions',
|
| 252 |
+
'Missing pipe operators',
|
| 253 |
+
'Old data.frame usage',
|
| 254 |
+
'Missing ggplot2'
|
| 255 |
+
],
|
| 256 |
+
'Perl': [
|
| 257 |
+
'Perl 4 syntax',
|
| 258 |
+
'Missing strict and warnings',
|
| 259 |
+
'Old module system',
|
| 260 |
+
'Deprecated CPAN modules',
|
| 261 |
+
'Missing Moose/Moo',
|
| 262 |
+
'Old regex syntax'
|
| 263 |
+
],
|
| 264 |
+
'Shell': [
|
| 265 |
+
'Missing error handling (set -e)',
|
| 266 |
+
'Unquoted variables',
|
| 267 |
+
'Missing shellcheck compliance',
|
| 268 |
+
'Deprecated commands',
|
| 269 |
+
'Missing function usage',
|
| 270 |
+
'Security vulnerabilities'
|
| 271 |
+
]
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
def __init__(self, cache_dir: Optional[str] = None):
|
| 275 |
+
"""
|
| 276 |
+
Initialize pattern matcher.
|
| 277 |
+
|
| 278 |
+
Args:
|
| 279 |
+
cache_dir: Optional directory for caching analysis results
|
| 280 |
+
"""
|
| 281 |
+
# Use centralized AI manager
|
| 282 |
+
self.ai_manager = AIManager()
|
| 283 |
+
self.cache_dir = Path(cache_dir) if cache_dir else None
|
| 284 |
+
|
| 285 |
+
if self.cache_dir:
|
| 286 |
+
self.cache_dir.mkdir(exist_ok=True, parents=True)
|
| 287 |
+
|
| 288 |
+
logger.info(
|
| 289 |
+
f"IntelligentPatternMatcher initialized with provider: {self.ai_manager.provider_name}, "
|
| 290 |
+
f"model: {self.ai_manager.model_name}"
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
def detect_language(self, file_path: str, code_sample: str) -> Tuple[str, Optional[str]]:
|
| 294 |
+
"""
|
| 295 |
+
Detect programming language and framework using AI.
|
| 296 |
+
|
| 297 |
+
Args:
|
| 298 |
+
file_path: Path to the file
|
| 299 |
+
code_sample: Sample of code (first 500 chars)
|
| 300 |
+
|
| 301 |
+
Returns:
|
| 302 |
+
Tuple of (language, framework)
|
| 303 |
+
"""
|
| 304 |
+
# First try extension-based detection
|
| 305 |
+
ext = Path(file_path).suffix.lower()
|
| 306 |
+
base_language = self.LANGUAGE_PATTERNS.get(ext, 'Unknown')
|
| 307 |
+
|
| 308 |
+
# Use AI for framework detection
|
| 309 |
+
prompt = f"""Analyze this code and identify:
|
| 310 |
+
1. Programming language (confirm or correct: {base_language})
|
| 311 |
+
2. Framework/library being used (if any)
|
| 312 |
+
|
| 313 |
+
FILE: {file_path}
|
| 314 |
+
CODE SAMPLE:
|
| 315 |
+
```
|
| 316 |
+
{code_sample[:500]}
|
| 317 |
+
```
|
| 318 |
+
|
| 319 |
+
Respond in JSON format:
|
| 320 |
+
{{
|
| 321 |
+
"language": "detected language",
|
| 322 |
+
"framework": "framework name or null",
|
| 323 |
+
"confidence": 0.0-1.0
|
| 324 |
+
}}
|
| 325 |
+
"""
|
| 326 |
+
|
| 327 |
+
try:
|
| 328 |
+
# Use JSON schema for guaranteed structure
|
| 329 |
+
schema = GeminiSchemas.language_detection()
|
| 330 |
+
|
| 331 |
+
response_text = self.ai_manager.generate_content(
|
| 332 |
+
prompt=prompt,
|
| 333 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 334 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_SMALL,
|
| 335 |
+
response_format="json",
|
| 336 |
+
response_schema=schema if self.ai_manager.provider_type == "gemini" else None
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
result = json.loads(response_text)
|
| 340 |
+
language = result.get('language', base_language)
|
| 341 |
+
framework = result.get('framework')
|
| 342 |
+
|
| 343 |
+
logger.info(f"Language detection: {language}, Framework: {framework}, Confidence: {result.get('confidence', 0)}")
|
| 344 |
+
return language, framework
|
| 345 |
+
|
| 346 |
+
except Exception as e:
|
| 347 |
+
logger.warning(f"AI language detection failed: {e}, using extension-based")
|
| 348 |
+
return base_language, None
|
| 349 |
+
|
| 350 |
+
def analyze_file(self, file_path: str, code: str) -> FileAnalysis:
|
| 351 |
+
"""
|
| 352 |
+
Perform comprehensive pattern analysis on a single file.
|
| 353 |
+
|
| 354 |
+
Args:
|
| 355 |
+
file_path: Path to the file
|
| 356 |
+
code: File contents
|
| 357 |
+
|
| 358 |
+
Returns:
|
| 359 |
+
FileAnalysis object with detected patterns
|
| 360 |
+
"""
|
| 361 |
+
logger.info(f"Analyzing patterns in {file_path}")
|
| 362 |
+
|
| 363 |
+
# Check cache
|
| 364 |
+
if self.cache_dir:
|
| 365 |
+
cache_file = self.cache_dir / f"{hash(file_path + code)}.json"
|
| 366 |
+
if cache_file.exists():
|
| 367 |
+
try:
|
| 368 |
+
cached = json.loads(cache_file.read_text())
|
| 369 |
+
return self._deserialize_analysis(cached)
|
| 370 |
+
except Exception as e:
|
| 371 |
+
logger.warning(f"Cache read failed: {e}")
|
| 372 |
+
|
| 373 |
+
# Detect language and framework
|
| 374 |
+
language, framework = self.detect_language(file_path, code[:500])
|
| 375 |
+
|
| 376 |
+
# Get relevant patterns for this language
|
| 377 |
+
relevant_patterns = self.LEGACY_PATTERNS.get(language, [])
|
| 378 |
+
|
| 379 |
+
# Build analysis prompt - limit code size to prevent output token overflow
|
| 380 |
+
# For large files, we need to be more conservative to leave room for detailed analysis
|
| 381 |
+
code_limit = 4000 if len(code) > 6000 else 6000
|
| 382 |
+
|
| 383 |
+
prompt = f"""You are a senior code auditor. Analyze this code for legacy patterns and modernization opportunities.
|
| 384 |
+
|
| 385 |
+
FILE: {file_path}
|
| 386 |
+
LANGUAGE: {language}
|
| 387 |
+
FRAMEWORK: {framework or 'None detected'}
|
| 388 |
+
|
| 389 |
+
PATTERNS TO CHECK:
|
| 390 |
+
{json.dumps(relevant_patterns, indent=2)}
|
| 391 |
+
|
| 392 |
+
CODE:
|
| 393 |
+
```{language.lower()}
|
| 394 |
+
{code[:code_limit]}
|
| 395 |
+
```
|
| 396 |
+
|
| 397 |
+
IMPORTANT: Focus on the MOST CRITICAL patterns. Limit your response to the top 10 most important issues.
|
| 398 |
+
|
| 399 |
+
For each detected pattern, provide:
|
| 400 |
+
1. Pattern type (from the list above or new if discovered)
|
| 401 |
+
2. Severity (critical/high/medium/low/info)
|
| 402 |
+
3. Line numbers where pattern appears (first occurrence only)
|
| 403 |
+
4. Confidence score (0.0-1.0)
|
| 404 |
+
5. Brief description (max 100 chars)
|
| 405 |
+
6. Concise recommendation (max 100 chars)
|
| 406 |
+
7. Estimated effort in hours
|
| 407 |
+
|
| 408 |
+
Also provide:
|
| 409 |
+
- Overall modernization score (0-100, where 100 is fully modern)
|
| 410 |
+
- Whether modernization is required (true/false)
|
| 411 |
+
- Overall priority (critical/high/medium/low/info)
|
| 412 |
+
|
| 413 |
+
Respond in JSON format:
|
| 414 |
+
{{
|
| 415 |
+
"patterns": [
|
| 416 |
+
{{
|
| 417 |
+
"pattern_type": "string",
|
| 418 |
+
"severity": "critical|high|medium|low|info",
|
| 419 |
+
"line_numbers": [1],
|
| 420 |
+
"confidence": 0.95,
|
| 421 |
+
"description": "brief description",
|
| 422 |
+
"recommendation": "concise fix",
|
| 423 |
+
"estimated_effort_hours": 2.5
|
| 424 |
+
}}
|
| 425 |
+
],
|
| 426 |
+
"modernization_score": 65,
|
| 427 |
+
"requires_modernization": true,
|
| 428 |
+
"overall_priority": "high"
|
| 429 |
+
}}
|
| 430 |
+
"""
|
| 431 |
+
|
| 432 |
+
try:
|
| 433 |
+
# Use JSON schema for guaranteed structure - no more parsing failures!
|
| 434 |
+
# Use LARGE token limit for detailed pattern analysis
|
| 435 |
+
schema = GeminiSchemas.pattern_analysis()
|
| 436 |
+
|
| 437 |
+
response_text = self.ai_manager.generate_content(
|
| 438 |
+
prompt=prompt,
|
| 439 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 440 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE,
|
| 441 |
+
response_format="json",
|
| 442 |
+
response_schema=schema if self.ai_manager.provider_type == "gemini" else None
|
| 443 |
+
)
|
| 444 |
+
|
| 445 |
+
if not response_text:
|
| 446 |
+
logger.error(f"Empty response from AI for {file_path}")
|
| 447 |
+
raise ValueError(f"Empty response from AI API for {file_path}")
|
| 448 |
+
|
| 449 |
+
# With JSON schema, response is guaranteed to be valid JSON
|
| 450 |
+
result = json.loads(response_text)
|
| 451 |
+
logger.info(f"Pattern analysis successful for {file_path}: {len(result.get('patterns', []))} patterns found")
|
| 452 |
+
|
| 453 |
+
# Convert to DetectedPattern objects
|
| 454 |
+
patterns = []
|
| 455 |
+
for p in result.get('patterns', []):
|
| 456 |
+
patterns.append(DetectedPattern(
|
| 457 |
+
pattern_type=p['pattern_type'],
|
| 458 |
+
severity=PatternSeverity(p['severity']),
|
| 459 |
+
file_path=file_path,
|
| 460 |
+
language=language,
|
| 461 |
+
description=p['description'],
|
| 462 |
+
line_numbers=p.get('line_numbers', []),
|
| 463 |
+
confidence=p.get('confidence', 0.8),
|
| 464 |
+
recommendation=p['recommendation'],
|
| 465 |
+
estimated_effort_hours=p.get('estimated_effort_hours', 1.0)
|
| 466 |
+
))
|
| 467 |
+
|
| 468 |
+
analysis = FileAnalysis(
|
| 469 |
+
file_path=file_path,
|
| 470 |
+
language=language,
|
| 471 |
+
framework=framework,
|
| 472 |
+
patterns=patterns,
|
| 473 |
+
overall_priority=PatternSeverity(result.get('overall_priority', 'medium')),
|
| 474 |
+
modernization_score=result.get('modernization_score', 50),
|
| 475 |
+
requires_modernization=result.get('requires_modernization', True)
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
# Cache the result
|
| 479 |
+
if self.cache_dir:
|
| 480 |
+
try:
|
| 481 |
+
cache_file = self.cache_dir / f"{hash(file_path + code)}.json"
|
| 482 |
+
cache_file.write_text(json.dumps(self._serialize_analysis(analysis), indent=2))
|
| 483 |
+
except Exception as e:
|
| 484 |
+
logger.warning(f"Cache write failed: {e}")
|
| 485 |
+
|
| 486 |
+
logger.info(f"Found {len(patterns)} patterns in {file_path}")
|
| 487 |
+
return analysis
|
| 488 |
+
|
| 489 |
+
except Exception as e:
|
| 490 |
+
logger.error(f"Pattern analysis failed for {file_path}: {e}")
|
| 491 |
+
# Return minimal analysis on error
|
| 492 |
+
return FileAnalysis(
|
| 493 |
+
file_path=file_path,
|
| 494 |
+
language=language,
|
| 495 |
+
framework=framework,
|
| 496 |
+
patterns=[],
|
| 497 |
+
overall_priority=PatternSeverity.INFO,
|
| 498 |
+
modernization_score=100,
|
| 499 |
+
requires_modernization=False
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
def analyze_batch(self, files: Dict[str, str], batch_size: int = 3) -> Dict[str, FileAnalysis]:
|
| 503 |
+
"""
|
| 504 |
+
Analyze multiple files efficiently by batching API calls.
|
| 505 |
+
|
| 506 |
+
Args:
|
| 507 |
+
files: Dictionary mapping file paths to contents
|
| 508 |
+
batch_size: Number of files to analyze per API call (default: 3)
|
| 509 |
+
|
| 510 |
+
Returns:
|
| 511 |
+
Dictionary mapping file paths to FileAnalysis objects
|
| 512 |
+
"""
|
| 513 |
+
logger.info(f"Batch analyzing {len(files)} files with batch_size={batch_size}")
|
| 514 |
+
|
| 515 |
+
results = {}
|
| 516 |
+
file_items = list(files.items())
|
| 517 |
+
|
| 518 |
+
# Process in batches to reduce API calls
|
| 519 |
+
for i in range(0, len(file_items), batch_size):
|
| 520 |
+
batch = file_items[i:i + batch_size]
|
| 521 |
+
|
| 522 |
+
if len(batch) == 1:
|
| 523 |
+
# Single file - use individual analysis
|
| 524 |
+
file_path, code = batch[0]
|
| 525 |
+
try:
|
| 526 |
+
analysis = self.analyze_file(file_path, code)
|
| 527 |
+
results[file_path] = analysis
|
| 528 |
+
except Exception as e:
|
| 529 |
+
logger.error(f"Failed to analyze {file_path}: {e}")
|
| 530 |
+
else:
|
| 531 |
+
# Multiple files - use batch analysis
|
| 532 |
+
try:
|
| 533 |
+
batch_results = self._analyze_batch_api(batch)
|
| 534 |
+
results.update(batch_results)
|
| 535 |
+
except Exception as e:
|
| 536 |
+
logger.error(f"Batch analysis failed: {e}, falling back to individual")
|
| 537 |
+
# Fallback to individual analysis
|
| 538 |
+
for file_path, code in batch:
|
| 539 |
+
try:
|
| 540 |
+
analysis = self.analyze_file(file_path, code)
|
| 541 |
+
results[file_path] = analysis
|
| 542 |
+
except Exception as e2:
|
| 543 |
+
logger.error(f"Failed to analyze {file_path}: {e2}")
|
| 544 |
+
|
| 545 |
+
logger.info(f"Batch analysis complete: {len(results)} files analyzed")
|
| 546 |
+
return results
|
| 547 |
+
|
| 548 |
+
def _analyze_batch_api(self, batch: List[Tuple[str, str]]) -> Dict[str, FileAnalysis]:
|
| 549 |
+
"""
|
| 550 |
+
Analyze multiple files in a single API call.
|
| 551 |
+
|
| 552 |
+
Args:
|
| 553 |
+
batch: List of (file_path, code) tuples
|
| 554 |
+
|
| 555 |
+
Returns:
|
| 556 |
+
Dictionary mapping file paths to FileAnalysis objects
|
| 557 |
+
"""
|
| 558 |
+
logger.info(f"Analyzing {len(batch)} files in single API call")
|
| 559 |
+
|
| 560 |
+
# Build combined prompt for all files
|
| 561 |
+
# Reduce code sample size for batch processing to prevent token overflow
|
| 562 |
+
files_info = []
|
| 563 |
+
for file_path, code in batch:
|
| 564 |
+
ext = Path(file_path).suffix.lower()
|
| 565 |
+
language = self.LANGUAGE_PATTERNS.get(ext, 'Unknown')
|
| 566 |
+
|
| 567 |
+
# Use smaller samples for batch to leave room for multiple file analyses
|
| 568 |
+
code_sample_size = 2000 if len(batch) > 2 else 3000
|
| 569 |
+
|
| 570 |
+
files_info.append({
|
| 571 |
+
'file_path': file_path,
|
| 572 |
+
'language': language,
|
| 573 |
+
'code_sample': code[:code_sample_size]
|
| 574 |
+
})
|
| 575 |
+
|
| 576 |
+
prompt = f"""Analyze these {len(batch)} code files for legacy patterns and modernization opportunities.
|
| 577 |
+
|
| 578 |
+
For EACH file, provide a complete analysis with patterns, scores, and priorities.
|
| 579 |
+
IMPORTANT: Limit to top 5 most critical patterns per file to keep response concise.
|
| 580 |
+
|
| 581 |
+
FILES TO ANALYZE:
|
| 582 |
+
{json.dumps(files_info, indent=2)}
|
| 583 |
+
|
| 584 |
+
For each file, detect:
|
| 585 |
+
- Deprecated libraries and APIs
|
| 586 |
+
- Security vulnerabilities (SQL injection, XSS, hardcoded credentials)
|
| 587 |
+
- Code quality issues (missing type hints, error handling)
|
| 588 |
+
- Performance problems
|
| 589 |
+
|
| 590 |
+
Keep descriptions and recommendations brief (max 80 chars each).
|
| 591 |
+
|
| 592 |
+
Respond in JSON format with this structure:
|
| 593 |
+
{{
|
| 594 |
+
"files": [
|
| 595 |
+
{{
|
| 596 |
+
"file_path": "file1.py",
|
| 597 |
+
"language": "Python",
|
| 598 |
+
"framework": "Flask or null",
|
| 599 |
+
"patterns": [
|
| 600 |
+
{{
|
| 601 |
+
"pattern_type": "SQL injection vulnerability",
|
| 602 |
+
"severity": "critical",
|
| 603 |
+
"line_numbers": [10, 11],
|
| 604 |
+
"confidence": 0.95,
|
| 605 |
+
"description": "Direct string concatenation in SQL query",
|
| 606 |
+
"recommendation": "Use parameterized queries",
|
| 607 |
+
"estimated_effort_hours": 2.0
|
| 608 |
+
}}
|
| 609 |
+
],
|
| 610 |
+
"modernization_score": 35,
|
| 611 |
+
"requires_modernization": true,
|
| 612 |
+
"overall_priority": "critical"
|
| 613 |
+
}}
|
| 614 |
+
]
|
| 615 |
+
}}
|
| 616 |
+
"""
|
| 617 |
+
|
| 618 |
+
try:
|
| 619 |
+
# Use JSON schema for guaranteed structure
|
| 620 |
+
schema = GeminiSchemas.batch_pattern_analysis()
|
| 621 |
+
|
| 622 |
+
response_text = self.ai_manager.generate_content(
|
| 623 |
+
prompt=prompt,
|
| 624 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 625 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE,
|
| 626 |
+
response_format="json",
|
| 627 |
+
response_schema=schema if self.ai_manager.provider_type == "gemini" else None
|
| 628 |
+
)
|
| 629 |
+
|
| 630 |
+
# With JSON schema, response is guaranteed to be valid JSON
|
| 631 |
+
result = json.loads(response_text)
|
| 632 |
+
logger.info(f"Batch analysis successful: received data for {len(result.get('files', []))} files")
|
| 633 |
+
|
| 634 |
+
# Schema guarantees 'files' key exists
|
| 635 |
+
files_data = result.get('files', [])
|
| 636 |
+
|
| 637 |
+
# Convert to FileAnalysis objects
|
| 638 |
+
analyses = {}
|
| 639 |
+
for file_data in files_data:
|
| 640 |
+
file_path = file_data['file_path']
|
| 641 |
+
language = file_data.get('language', 'Unknown')
|
| 642 |
+
framework = file_data.get('framework')
|
| 643 |
+
|
| 644 |
+
patterns = []
|
| 645 |
+
for p in file_data.get('patterns', []):
|
| 646 |
+
patterns.append(DetectedPattern(
|
| 647 |
+
pattern_type=p['pattern_type'],
|
| 648 |
+
severity=PatternSeverity(p['severity']),
|
| 649 |
+
file_path=file_path,
|
| 650 |
+
language=language,
|
| 651 |
+
description=p['description'],
|
| 652 |
+
line_numbers=p.get('line_numbers', []),
|
| 653 |
+
confidence=p.get('confidence', 0.8),
|
| 654 |
+
recommendation=p['recommendation'],
|
| 655 |
+
estimated_effort_hours=p.get('estimated_effort_hours', 1.0)
|
| 656 |
+
))
|
| 657 |
+
|
| 658 |
+
analysis = FileAnalysis(
|
| 659 |
+
file_path=file_path,
|
| 660 |
+
language=language,
|
| 661 |
+
framework=framework,
|
| 662 |
+
patterns=patterns,
|
| 663 |
+
overall_priority=PatternSeverity(file_data.get('overall_priority', 'medium')),
|
| 664 |
+
modernization_score=file_data.get('modernization_score', 50),
|
| 665 |
+
requires_modernization=file_data.get('requires_modernization', True)
|
| 666 |
+
)
|
| 667 |
+
|
| 668 |
+
analyses[file_path] = analysis
|
| 669 |
+
|
| 670 |
+
logger.info(f"Batch API call successful: analyzed {len(analyses)} files")
|
| 671 |
+
return analyses
|
| 672 |
+
|
| 673 |
+
except Exception as e:
|
| 674 |
+
logger.error(f"Batch API call failed: {e}")
|
| 675 |
+
raise
|
| 676 |
+
|
| 677 |
+
def prioritize_files(self, analyses: Dict[str, FileAnalysis]) -> List[Tuple[str, FileAnalysis]]:
|
| 678 |
+
"""
|
| 679 |
+
Prioritize files for modernization based on analysis.
|
| 680 |
+
|
| 681 |
+
Args:
|
| 682 |
+
analyses: Dictionary of file analyses
|
| 683 |
+
|
| 684 |
+
Returns:
|
| 685 |
+
Sorted list of (file_path, analysis) tuples, highest priority first
|
| 686 |
+
"""
|
| 687 |
+
# Define priority weights
|
| 688 |
+
severity_weights = {
|
| 689 |
+
PatternSeverity.CRITICAL: 100,
|
| 690 |
+
PatternSeverity.HIGH: 75,
|
| 691 |
+
PatternSeverity.MEDIUM: 50,
|
| 692 |
+
PatternSeverity.LOW: 25,
|
| 693 |
+
PatternSeverity.INFO: 10
|
| 694 |
+
}
|
| 695 |
+
|
| 696 |
+
def calculate_priority_score(analysis: FileAnalysis) -> float:
|
| 697 |
+
"""Calculate priority score for an analysis."""
|
| 698 |
+
# Base score from overall priority
|
| 699 |
+
base_score = severity_weights.get(analysis.overall_priority, 50)
|
| 700 |
+
|
| 701 |
+
# Add points for each pattern weighted by severity and confidence
|
| 702 |
+
pattern_score = sum(
|
| 703 |
+
severity_weights.get(p.severity, 25) * p.confidence
|
| 704 |
+
for p in analysis.patterns
|
| 705 |
+
)
|
| 706 |
+
|
| 707 |
+
# Factor in modernization score (lower = higher priority)
|
| 708 |
+
modernization_penalty = (100 - analysis.modernization_score) / 10
|
| 709 |
+
|
| 710 |
+
return base_score + pattern_score + modernization_penalty
|
| 711 |
+
|
| 712 |
+
# Sort by priority score
|
| 713 |
+
prioritized = sorted(
|
| 714 |
+
analyses.items(),
|
| 715 |
+
key=lambda x: calculate_priority_score(x[1]),
|
| 716 |
+
reverse=True
|
| 717 |
+
)
|
| 718 |
+
|
| 719 |
+
return prioritized
|
| 720 |
+
|
| 721 |
+
def generate_report(self, analyses: Dict[str, FileAnalysis]) -> str:
|
| 722 |
+
"""
|
| 723 |
+
Generate human-readable report from analyses.
|
| 724 |
+
|
| 725 |
+
Args:
|
| 726 |
+
analyses: Dictionary of file analyses
|
| 727 |
+
|
| 728 |
+
Returns:
|
| 729 |
+
Formatted report string
|
| 730 |
+
"""
|
| 731 |
+
report = []
|
| 732 |
+
report.append("=" * 80)
|
| 733 |
+
report.append("INTELLIGENT PATTERN MATCHING REPORT")
|
| 734 |
+
report.append("=" * 80)
|
| 735 |
+
report.append("")
|
| 736 |
+
|
| 737 |
+
# Summary statistics
|
| 738 |
+
total_files = len(analyses)
|
| 739 |
+
files_needing_modernization = sum(1 for a in analyses.values() if a.requires_modernization)
|
| 740 |
+
total_patterns = sum(len(a.patterns) for a in analyses.values())
|
| 741 |
+
avg_modernization_score = sum(a.modernization_score for a in analyses.values()) / max(total_files, 1)
|
| 742 |
+
|
| 743 |
+
report.append("SUMMARY:")
|
| 744 |
+
report.append(f" Total Files Analyzed: {total_files}")
|
| 745 |
+
report.append(f" Files Requiring Modernization: {files_needing_modernization}")
|
| 746 |
+
report.append(f" Total Patterns Detected: {total_patterns}")
|
| 747 |
+
report.append(f" Average Modernization Score: {avg_modernization_score:.1f}/100")
|
| 748 |
+
report.append("")
|
| 749 |
+
|
| 750 |
+
# Language breakdown
|
| 751 |
+
language_counts = {}
|
| 752 |
+
for analysis in analyses.values():
|
| 753 |
+
language_counts[analysis.language] = language_counts.get(analysis.language, 0) + 1
|
| 754 |
+
|
| 755 |
+
report.append("LANGUAGES:")
|
| 756 |
+
for lang, count in sorted(language_counts.items(), key=lambda x: x[1], reverse=True):
|
| 757 |
+
report.append(f" {lang}: {count} files")
|
| 758 |
+
report.append("")
|
| 759 |
+
|
| 760 |
+
# Severity breakdown
|
| 761 |
+
severity_counts = {s: 0 for s in PatternSeverity}
|
| 762 |
+
for analysis in analyses.values():
|
| 763 |
+
for pattern in analysis.patterns:
|
| 764 |
+
severity_counts[pattern.severity] += 1
|
| 765 |
+
|
| 766 |
+
report.append("PATTERNS BY SEVERITY:")
|
| 767 |
+
for severity in [PatternSeverity.CRITICAL, PatternSeverity.HIGH,
|
| 768 |
+
PatternSeverity.MEDIUM, PatternSeverity.LOW, PatternSeverity.INFO]:
|
| 769 |
+
count = severity_counts[severity]
|
| 770 |
+
if count > 0:
|
| 771 |
+
report.append(f" {severity.value.upper()}: {count}")
|
| 772 |
+
report.append("")
|
| 773 |
+
|
| 774 |
+
# Top priority files
|
| 775 |
+
prioritized = self.prioritize_files(analyses)[:10]
|
| 776 |
+
report.append("TOP 10 PRIORITY FILES:")
|
| 777 |
+
for i, (file_path, analysis) in enumerate(prioritized, 1):
|
| 778 |
+
report.append(f" {i}. {file_path}")
|
| 779 |
+
report.append(f" Priority: {analysis.overall_priority.value}")
|
| 780 |
+
report.append(f" Modernization Score: {analysis.modernization_score}/100")
|
| 781 |
+
report.append(f" Patterns: {len(analysis.patterns)}")
|
| 782 |
+
|
| 783 |
+
report.append("")
|
| 784 |
+
report.append("=" * 80)
|
| 785 |
+
|
| 786 |
+
return "\n".join(report)
|
| 787 |
+
|
| 788 |
+
def _serialize_analysis(self, analysis: FileAnalysis) -> dict:
|
| 789 |
+
"""Serialize FileAnalysis to dict for caching."""
|
| 790 |
+
return {
|
| 791 |
+
'file_path': analysis.file_path,
|
| 792 |
+
'language': analysis.language,
|
| 793 |
+
'framework': analysis.framework,
|
| 794 |
+
'patterns': [
|
| 795 |
+
{
|
| 796 |
+
'pattern_type': p.pattern_type,
|
| 797 |
+
'severity': p.severity.value,
|
| 798 |
+
'file_path': p.file_path,
|
| 799 |
+
'language': p.language,
|
| 800 |
+
'description': p.description,
|
| 801 |
+
'line_numbers': p.line_numbers,
|
| 802 |
+
'confidence': p.confidence,
|
| 803 |
+
'recommendation': p.recommendation,
|
| 804 |
+
'estimated_effort_hours': p.estimated_effort_hours
|
| 805 |
+
}
|
| 806 |
+
for p in analysis.patterns
|
| 807 |
+
],
|
| 808 |
+
'overall_priority': analysis.overall_priority.value,
|
| 809 |
+
'modernization_score': analysis.modernization_score,
|
| 810 |
+
'requires_modernization': analysis.requires_modernization
|
| 811 |
+
}
|
| 812 |
+
|
| 813 |
+
def _deserialize_analysis(self, data: dict) -> FileAnalysis:
|
| 814 |
+
"""Deserialize dict to FileAnalysis."""
|
| 815 |
+
patterns = [
|
| 816 |
+
DetectedPattern(
|
| 817 |
+
pattern_type=p['pattern_type'],
|
| 818 |
+
severity=PatternSeverity(p['severity']),
|
| 819 |
+
file_path=p['file_path'],
|
| 820 |
+
language=p['language'],
|
| 821 |
+
description=p['description'],
|
| 822 |
+
line_numbers=p['line_numbers'],
|
| 823 |
+
confidence=p['confidence'],
|
| 824 |
+
recommendation=p['recommendation'],
|
| 825 |
+
estimated_effort_hours=p['estimated_effort_hours']
|
| 826 |
+
)
|
| 827 |
+
for p in data['patterns']
|
| 828 |
+
]
|
| 829 |
+
|
| 830 |
+
return FileAnalysis(
|
| 831 |
+
file_path=data['file_path'],
|
| 832 |
+
language=data['language'],
|
| 833 |
+
framework=data['framework'],
|
| 834 |
+
patterns=patterns,
|
| 835 |
+
overall_priority=PatternSeverity(data['overall_priority']),
|
| 836 |
+
modernization_score=data['modernization_score'],
|
| 837 |
+
requires_modernization=data['requires_modernization']
|
| 838 |
+
)
|
src/agents/test_generator.py
ADDED
|
@@ -0,0 +1,706 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Test Generator - Generates unit tests for code transformations using AI.
|
| 3 |
+
Supports multiple AI providers (Gemini, Nebius, OpenAI).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import logging
|
| 8 |
+
from typing import Dict, Optional
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from src.config import AIManager
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class CodeTestGenerator:
|
| 17 |
+
"""
|
| 18 |
+
Generates comprehensive unit tests for code transformations.
|
| 19 |
+
Uses Gemini 2.5 Flash to create behavioral equivalence tests.
|
| 20 |
+
|
| 21 |
+
Note: Renamed from TestGenerator to avoid pytest collection conflicts.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
def __init__(self):
|
| 25 |
+
"""Initialize Code Test Generator."""
|
| 26 |
+
# Use centralized AI manager
|
| 27 |
+
self.ai_manager = AIManager()
|
| 28 |
+
|
| 29 |
+
logger.info(
|
| 30 |
+
f"CodeTestGenerator initialized with provider: {self.ai_manager.provider_name}, "
|
| 31 |
+
f"model: {self.ai_manager.model_name}"
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
def generate_tests(self, original_code: str, modernized_code: str,
|
| 35 |
+
file_path: str, language: str = None) -> str:
|
| 36 |
+
"""
|
| 37 |
+
Generate comprehensive unit tests for code transformation.
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
original_code: Original legacy code
|
| 41 |
+
modernized_code: Modernized code
|
| 42 |
+
file_path: Path to the file
|
| 43 |
+
language: Programming language (auto-detected if not provided)
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
Generated test code as string
|
| 47 |
+
"""
|
| 48 |
+
logger.info(f"Generating tests for {file_path}")
|
| 49 |
+
|
| 50 |
+
# Auto-detect language from file extension if not provided
|
| 51 |
+
if language is None:
|
| 52 |
+
language = self._detect_language(file_path, modernized_code)
|
| 53 |
+
|
| 54 |
+
logger.info(f"Detected language: {language}")
|
| 55 |
+
|
| 56 |
+
# Language-specific test framework
|
| 57 |
+
framework_map = {
|
| 58 |
+
"python": "pytest",
|
| 59 |
+
"java": "JUnit 5",
|
| 60 |
+
"javascript": "Jest",
|
| 61 |
+
"typescript": "Jest",
|
| 62 |
+
"go": "testing package",
|
| 63 |
+
"ruby": "RSpec",
|
| 64 |
+
"csharp": "xUnit",
|
| 65 |
+
"cpp": "Google Test",
|
| 66 |
+
"kotlin": "JUnit 5",
|
| 67 |
+
"scala": "ScalaTest"
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
framework = framework_map.get(language.lower(), "pytest")
|
| 71 |
+
|
| 72 |
+
# Truncate code if too long to avoid token limits
|
| 73 |
+
# Increased from 3000 to 8000 to give AI more context
|
| 74 |
+
max_code_length = 8000 # chars per code block
|
| 75 |
+
original_truncated = original_code[:max_code_length] + ("\n\n# ... (truncated)" if len(original_code) > max_code_length else "")
|
| 76 |
+
modernized_truncated = modernized_code[:max_code_length] + ("\n\n# ... (truncated)" if len(modernized_code) > max_code_length else "")
|
| 77 |
+
|
| 78 |
+
# Extract module name for proper imports
|
| 79 |
+
module_name = Path(file_path).stem
|
| 80 |
+
|
| 81 |
+
# Language-specific setup instructions
|
| 82 |
+
setup_instructions = ""
|
| 83 |
+
import_instructions = ""
|
| 84 |
+
|
| 85 |
+
if language == "python":
|
| 86 |
+
setup_instructions = """1. **CRITICAL SANDBOX ENVIRONMENT**: Modal Sandbox Execution:
|
| 87 |
+
- Test file location: `/workspace/test_{module_name}.py`
|
| 88 |
+
- IMPORTANT: The test file contains BOTH source code AND tests combined in one file
|
| 89 |
+
- Implementation code is defined first, then test functions use it
|
| 90 |
+
- Start the test file with:
|
| 91 |
+
```python
|
| 92 |
+
import sys
|
| 93 |
+
import os
|
| 94 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 95 |
+
```"""
|
| 96 |
+
import_instructions = f'2. Import/Usage: Either "from {module_name} import ..." OR call functions directly (same file)'
|
| 97 |
+
elif language == "java":
|
| 98 |
+
setup_instructions = """1. **CRITICAL SANDBOX ENVIRONMENT**: Modal Sandbox Maven Execution:
|
| 99 |
+
- Source file: `/workspace/{module_name}.java` with package `com.modernizer`
|
| 100 |
+
- Test file: `/workspace/{module_name}Test.java` with package `com.modernizer`
|
| 101 |
+
- Both files are compiled together by Maven in the `/workspace/` directory
|
| 102 |
+
- Use proper JUnit 5 annotations:
|
| 103 |
+
```java
|
| 104 |
+
package com.modernizer;
|
| 105 |
+
import org.junit.jupiter.api.Test;
|
| 106 |
+
import org.junit.jupiter.api.BeforeEach;
|
| 107 |
+
import static org.junit.jupiter.api.Assertions.*;
|
| 108 |
+
|
| 109 |
+
public class {module_name}Test {{
|
| 110 |
+
@BeforeEach
|
| 111 |
+
void setUp() {{
|
| 112 |
+
// Setup code
|
| 113 |
+
}}
|
| 114 |
+
|
| 115 |
+
@Test
|
| 116 |
+
void testMethodName() {{
|
| 117 |
+
// Test code with assertions
|
| 118 |
+
}}
|
| 119 |
+
}}
|
| 120 |
+
```"""
|
| 121 |
+
import_instructions = f'2. Package: Use "package com.modernizer;" in both files - no imports needed (same package)'
|
| 122 |
+
elif language in ["javascript", "typescript"]:
|
| 123 |
+
ext = '.ts' if language == 'typescript' else '.js'
|
| 124 |
+
if language == 'typescript':
|
| 125 |
+
import_example = f'import {{ ... }} from "./{module_name}";'
|
| 126 |
+
import_note = "WITHOUT .ts extension (TypeScript resolves automatically)"
|
| 127 |
+
else:
|
| 128 |
+
import_example = f'import {{ ... }} from "./{module_name}.js";'
|
| 129 |
+
import_note = "WITH .js extension (ES modules require explicit extensions)"
|
| 130 |
+
|
| 131 |
+
setup_instructions = f"""1. **CRITICAL SANDBOX ENVIRONMENT**: Modal Sandbox Jest Execution:
|
| 132 |
+
- Source file: `/workspace/{module_name}{ext}`
|
| 133 |
+
- Test file: `/workspace/{module_name}.test{ext}`
|
| 134 |
+
- Framework: Jest configured for {'TypeScript (ts-jest preset)' if language == 'typescript' else 'JavaScript (ES modules)'}
|
| 135 |
+
- Use proper module import statements"""
|
| 136 |
+
import_instructions = f'2. Import: Use relative path {import_note}: `{import_example}`'
|
| 137 |
+
else:
|
| 138 |
+
setup_instructions = "1. Ensure proper imports/includes for the sandbox environment."
|
| 139 |
+
import_instructions = "2. Import the module/class to be tested from the same /workspace/ directory."
|
| 140 |
+
|
| 141 |
+
prompt = f"""Generate comprehensive unit tests for this code transformation.
|
| 142 |
+
|
| 143 |
+
FILE: {file_path}
|
| 144 |
+
MODULE NAME: {module_name}
|
| 145 |
+
LANGUAGE: {language}
|
| 146 |
+
TEST FRAMEWORK: {framework}
|
| 147 |
+
|
| 148 |
+
ORIGINAL CODE (truncated for context):
|
| 149 |
+
```{language}
|
| 150 |
+
{original_truncated}
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
MODERNIZED CODE (truncated for context):
|
| 154 |
+
```{language}
|
| 155 |
+
{modernized_truncated}
|
| 156 |
+
```
|
| 157 |
+
|
| 158 |
+
REQUIREMENTS:
|
| 159 |
+
{setup_instructions}
|
| 160 |
+
|
| 161 |
+
{import_instructions}
|
| 162 |
+
3. Test behavioral equivalence (same inputs → same outputs)
|
| 163 |
+
4. Test edge cases (empty inputs, None/null, invalid types, boundary values)
|
| 164 |
+
5. Test error handling and exceptions
|
| 165 |
+
6. Use {framework} framework
|
| 166 |
+
7. Mock external dependencies (databases, APIs, file system)
|
| 167 |
+
8. Include fixtures for common test data
|
| 168 |
+
9. Test both success and failure scenarios
|
| 169 |
+
10. Add descriptive test names and docstrings
|
| 170 |
+
11. Ensure tests are independent and can run in any order
|
| 171 |
+
12. Include setup and teardown if needed
|
| 172 |
+
|
| 173 |
+
SANDBOX FILE STRUCTURE:
|
| 174 |
+
- Python: test_{module_name}.py contains BOTH source code and tests combined
|
| 175 |
+
- Java: {module_name}.java and {module_name}Test.java in package com.modernizer, compiled together by Maven
|
| 176 |
+
- JavaScript: {module_name}.js and {module_name}.test.js (ES modules with "type": "module" in package.json)
|
| 177 |
+
- TypeScript: {module_name}.ts and {module_name}.test.ts (ts-jest preset handles compilation)
|
| 178 |
+
- All files are in /workspace/ directory in the Modal Sandbox
|
| 179 |
+
|
| 180 |
+
CRITICAL IMPORT INSTRUCTIONS:
|
| 181 |
+
- JavaScript: MUST use .js extension in imports: `import {{ ... }} from "./{module_name}.js";`
|
| 182 |
+
- TypeScript: MUST NOT use .ts extension in imports: `import {{ ... }} from "./{module_name}";`
|
| 183 |
+
- This is critical - wrong extensions will cause compilation/runtime errors!
|
| 184 |
+
|
| 185 |
+
CRITICAL OUTPUT INSTRUCTIONS:
|
| 186 |
+
- Return ONLY the complete test code in a single code block
|
| 187 |
+
- For Python: Source and tests are in SAME file, define functions first then tests
|
| 188 |
+
- For Java: Source and tests are SEPARATE files, same package, no imports needed
|
| 189 |
+
- For JS/TS: Tests are SEPARATE files, use relative imports with correct extensions (see above)
|
| 190 |
+
- DO NOT include any explanatory text, descriptions, or commentary before or after the code
|
| 191 |
+
- The response must be executable code that can run directly in a sandbox environment
|
| 192 |
+
- Start your response with the code block marker (```{language}) and end with the closing marker (```)
|
| 193 |
+
"""
|
| 194 |
+
try:
|
| 195 |
+
response_text = self.ai_manager.generate_content(
|
| 196 |
+
prompt=prompt,
|
| 197 |
+
temperature=AIManager.TEMPERATURE_MEDIUM,
|
| 198 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
# Check if response has text
|
| 202 |
+
if not response_text:
|
| 203 |
+
logger.warning(f"Empty response from AI for {file_path}")
|
| 204 |
+
return self._generate_fallback_test(file_path, language, framework)
|
| 205 |
+
|
| 206 |
+
test_code = self._extract_code(response_text)
|
| 207 |
+
|
| 208 |
+
# Validate that we got actual test code, not just fallback
|
| 209 |
+
if not test_code or len(test_code.strip()) < 100:
|
| 210 |
+
logger.warning(f"Generated test code too short for {file_path}, using fallback")
|
| 211 |
+
return self._generate_fallback_test(file_path, language, framework)
|
| 212 |
+
|
| 213 |
+
# Check if it contains actual test functions
|
| 214 |
+
if language == "python" and "def test_" not in test_code:
|
| 215 |
+
logger.warning(f"No test functions found in generated code for {file_path}")
|
| 216 |
+
return self._generate_fallback_test(file_path, language, framework)
|
| 217 |
+
|
| 218 |
+
logger.info(f"Test generation complete for {file_path} ({len(test_code)} chars)")
|
| 219 |
+
return test_code
|
| 220 |
+
|
| 221 |
+
except Exception as e:
|
| 222 |
+
logger.error(f"Error generating tests for {file_path}: {e}")
|
| 223 |
+
return self._generate_fallback_test(file_path, language, framework)
|
| 224 |
+
|
| 225 |
+
def generate_integration_tests(self, files: Dict[str, str],
|
| 226 |
+
language: str = "python") -> str:
|
| 227 |
+
"""
|
| 228 |
+
Generate integration tests for multiple related files.
|
| 229 |
+
|
| 230 |
+
Args:
|
| 231 |
+
files: Dictionary mapping file paths to their contents
|
| 232 |
+
language: Programming language
|
| 233 |
+
|
| 234 |
+
Returns:
|
| 235 |
+
Generated integration test code
|
| 236 |
+
"""
|
| 237 |
+
logger.info(f"Generating integration tests for {len(files)} files")
|
| 238 |
+
|
| 239 |
+
files_summary = "\n\n".join([
|
| 240 |
+
f"FILE: {path}\n```{language}\n{content[:500]}...\n```"
|
| 241 |
+
for path, content in list(files.items())[:5]
|
| 242 |
+
])
|
| 243 |
+
|
| 244 |
+
prompt = f"""Generate integration tests for these related files.
|
| 245 |
+
|
| 246 |
+
{files_summary}
|
| 247 |
+
|
| 248 |
+
REQUIREMENTS:
|
| 249 |
+
1. Test interactions between modules
|
| 250 |
+
2. Test data flow across components
|
| 251 |
+
3. Test end-to-end scenarios
|
| 252 |
+
4. Mock external dependencies
|
| 253 |
+
5. Include setup and teardown for test environment
|
| 254 |
+
6. Test error propagation across modules
|
| 255 |
+
7. Ensure tests are comprehensive but maintainable
|
| 256 |
+
|
| 257 |
+
CRITICAL: Return ONLY the complete test code in a single code block.
|
| 258 |
+
DO NOT include any explanatory text, descriptions, or commentary.
|
| 259 |
+
The response must be executable code that can run directly in a sandbox.
|
| 260 |
+
"""
|
| 261 |
+
|
| 262 |
+
try:
|
| 263 |
+
response_text = self.ai_manager.generate_content(
|
| 264 |
+
prompt=prompt,
|
| 265 |
+
temperature=AIManager.TEMPERATURE_MEDIUM,
|
| 266 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
if not response_text:
|
| 270 |
+
logger.warning("Empty response for integration tests")
|
| 271 |
+
return ""
|
| 272 |
+
|
| 273 |
+
test_code = self._extract_code(response_text)
|
| 274 |
+
logger.info(f"Integration test generation complete ({len(test_code)} chars)")
|
| 275 |
+
return test_code
|
| 276 |
+
|
| 277 |
+
except Exception as e:
|
| 278 |
+
logger.error(f"Error generating integration tests: {e}")
|
| 279 |
+
return ""
|
| 280 |
+
|
| 281 |
+
def generate_security_tests(self, file_path: str, code: str,
|
| 282 |
+
vulnerabilities: list) -> str:
|
| 283 |
+
"""
|
| 284 |
+
Generate security-focused tests.
|
| 285 |
+
|
| 286 |
+
Args:
|
| 287 |
+
file_path: Path to the file
|
| 288 |
+
code: Code content
|
| 289 |
+
vulnerabilities: List of identified vulnerabilities
|
| 290 |
+
|
| 291 |
+
Returns:
|
| 292 |
+
Generated security test code
|
| 293 |
+
"""
|
| 294 |
+
logger.info(f"Generating security tests for {file_path}")
|
| 295 |
+
|
| 296 |
+
vulns_text = "\n".join([
|
| 297 |
+
f"- {v.get('type', 'Unknown')}: {v.get('description', '')}"
|
| 298 |
+
for v in vulnerabilities
|
| 299 |
+
])
|
| 300 |
+
|
| 301 |
+
# Detect language
|
| 302 |
+
language = self._detect_language(file_path, code)
|
| 303 |
+
framework_map = {
|
| 304 |
+
"python": "pytest",
|
| 305 |
+
"java": "JUnit 5",
|
| 306 |
+
"javascript": "Jest",
|
| 307 |
+
"typescript": "Jest",
|
| 308 |
+
"go": "testing package",
|
| 309 |
+
"ruby": "RSpec",
|
| 310 |
+
"csharp": "xUnit",
|
| 311 |
+
"cpp": "Google Test",
|
| 312 |
+
"kotlin": "JUnit 5",
|
| 313 |
+
"scala": "ScalaTest"
|
| 314 |
+
}
|
| 315 |
+
framework = framework_map.get(language.lower(), "pytest")
|
| 316 |
+
|
| 317 |
+
prompt = f"""Generate security-focused tests for this code.
|
| 318 |
+
|
| 319 |
+
FILE: {file_path}
|
| 320 |
+
LANGUAGE: {language}
|
| 321 |
+
TEST FRAMEWORK: {framework}
|
| 322 |
+
|
| 323 |
+
CODE:
|
| 324 |
+
```{language}
|
| 325 |
+
{code}
|
| 326 |
+
```
|
| 327 |
+
|
| 328 |
+
IDENTIFIED VULNERABILITIES:
|
| 329 |
+
{vulns_text}
|
| 330 |
+
|
| 331 |
+
REQUIREMENTS:
|
| 332 |
+
1. Test for SQL injection prevention
|
| 333 |
+
2. Test for XSS prevention
|
| 334 |
+
3. Test for authentication/authorization
|
| 335 |
+
4. Test for input validation
|
| 336 |
+
5. Test for secure credential handling
|
| 337 |
+
6. Test for proper error handling (no info leakage)
|
| 338 |
+
7. Use {framework} framework
|
| 339 |
+
8. Include security-specific assertions
|
| 340 |
+
|
| 341 |
+
CRITICAL: Return ONLY the complete test code in a single code block.
|
| 342 |
+
DO NOT include any explanatory text, descriptions, or commentary.
|
| 343 |
+
The response must be executable code that can run directly in a sandbox.
|
| 344 |
+
"""
|
| 345 |
+
|
| 346 |
+
try:
|
| 347 |
+
response_text = self.ai_manager.generate_content(
|
| 348 |
+
prompt=prompt,
|
| 349 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 350 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
if not response_text:
|
| 354 |
+
logger.warning(f"Empty response for security tests: {file_path}")
|
| 355 |
+
return ""
|
| 356 |
+
|
| 357 |
+
test_code = self._extract_code(response_text)
|
| 358 |
+
logger.info(f"Security test generation complete for {file_path} ({len(test_code)} chars)")
|
| 359 |
+
return test_code
|
| 360 |
+
|
| 361 |
+
except Exception as e:
|
| 362 |
+
logger.error(f"Error generating security tests: {e}")
|
| 363 |
+
return ""
|
| 364 |
+
|
| 365 |
+
def generate_performance_tests(self, file_path: str, code: str) -> str:
|
| 366 |
+
"""
|
| 367 |
+
Generate performance/benchmark tests.
|
| 368 |
+
|
| 369 |
+
Args:
|
| 370 |
+
file_path: Path to the file
|
| 371 |
+
code: Code content
|
| 372 |
+
|
| 373 |
+
Returns:
|
| 374 |
+
Generated performance test code
|
| 375 |
+
"""
|
| 376 |
+
logger.info(f"Generating performance tests for {file_path}")
|
| 377 |
+
|
| 378 |
+
# Detect language
|
| 379 |
+
language = self._detect_language(file_path, code)
|
| 380 |
+
framework_map = {
|
| 381 |
+
"python": "pytest-benchmark",
|
| 382 |
+
"java": "JMH (Java Microbenchmark Harness)",
|
| 383 |
+
"javascript": "Jest (with performance hooks)",
|
| 384 |
+
"typescript": "Jest (with performance hooks)",
|
| 385 |
+
"go": "testing package benchmarks",
|
| 386 |
+
"ruby": "Benchmark module",
|
| 387 |
+
"csharp": "BenchmarkDotNet",
|
| 388 |
+
"cpp": "Google Benchmark",
|
| 389 |
+
}
|
| 390 |
+
framework = framework_map.get(language.lower(), "pytest-benchmark")
|
| 391 |
+
|
| 392 |
+
prompt = f"""Generate performance tests for this code.
|
| 393 |
+
|
| 394 |
+
FILE: {file_path}
|
| 395 |
+
LANGUAGE: {language}
|
| 396 |
+
TEST FRAMEWORK: {framework}
|
| 397 |
+
|
| 398 |
+
CODE:
|
| 399 |
+
```{language}
|
| 400 |
+
{code}
|
| 401 |
+
```
|
| 402 |
+
|
| 403 |
+
REQUIREMENTS:
|
| 404 |
+
1. Use {framework} for performance testing
|
| 405 |
+
2. Test execution time for critical functions
|
| 406 |
+
3. Test memory usage
|
| 407 |
+
4. Test scalability with different input sizes
|
| 408 |
+
5. Include baseline performance metrics
|
| 409 |
+
6. Test for performance regressions
|
| 410 |
+
7. Add timeout tests for long-running operations
|
| 411 |
+
|
| 412 |
+
CRITICAL: Return ONLY the complete test code in a single code block.
|
| 413 |
+
DO NOT include any explanatory text, descriptions, or commentary.
|
| 414 |
+
The response must be executable code that can run directly in a sandbox.
|
| 415 |
+
"""
|
| 416 |
+
|
| 417 |
+
try:
|
| 418 |
+
response_text = self.ai_manager.generate_content(
|
| 419 |
+
prompt=prompt,
|
| 420 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 421 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
if not response_text:
|
| 425 |
+
logger.warning(f"Empty response for performance tests: {file_path}")
|
| 426 |
+
return ""
|
| 427 |
+
|
| 428 |
+
test_code = self._extract_code(response_text)
|
| 429 |
+
logger.info(f"Performance test generation complete for {file_path} ({len(test_code)} chars)")
|
| 430 |
+
return test_code
|
| 431 |
+
|
| 432 |
+
except Exception as e:
|
| 433 |
+
logger.error(f"Error generating performance tests: {e}")
|
| 434 |
+
return ""
|
| 435 |
+
|
| 436 |
+
def _extract_code(self, text: str) -> str:
|
| 437 |
+
"""
|
| 438 |
+
Extract code from markdown code blocks, removing any explanatory text.
|
| 439 |
+
|
| 440 |
+
Args:
|
| 441 |
+
text: Text that may contain markdown code blocks
|
| 442 |
+
|
| 443 |
+
Returns:
|
| 444 |
+
Extracted code only, without explanatory text
|
| 445 |
+
"""
|
| 446 |
+
# Handle None or empty text
|
| 447 |
+
if not text:
|
| 448 |
+
return ""
|
| 449 |
+
|
| 450 |
+
# Try to extract from markdown code blocks
|
| 451 |
+
if "```" in text:
|
| 452 |
+
parts = text.split("```")
|
| 453 |
+
|
| 454 |
+
# Find all code blocks
|
| 455 |
+
code_blocks = []
|
| 456 |
+
for i in range(1, len(parts), 2): # Code blocks are at odd indices
|
| 457 |
+
if i < len(parts):
|
| 458 |
+
code_block = parts[i]
|
| 459 |
+
lines = code_block.split('\n')
|
| 460 |
+
|
| 461 |
+
# Remove language identifier if present
|
| 462 |
+
first_line = lines[0].strip().lower()
|
| 463 |
+
if first_line in ['python', 'java', 'javascript', 'typescript', 'pytest', 'py', 'js', 'ts', 'go', 'ruby', 'rb']:
|
| 464 |
+
code_block = '\n'.join(lines[1:])
|
| 465 |
+
|
| 466 |
+
extracted = code_block.strip()
|
| 467 |
+
|
| 468 |
+
# Only add substantial code blocks
|
| 469 |
+
if len(extracted) > 50:
|
| 470 |
+
code_blocks.append(extracted)
|
| 471 |
+
|
| 472 |
+
# Return the largest code block (usually the main test file)
|
| 473 |
+
if code_blocks:
|
| 474 |
+
return max(code_blocks, key=len)
|
| 475 |
+
|
| 476 |
+
# If no code blocks found, check if the text itself looks like code
|
| 477 |
+
# (starts with import, def, class, etc.)
|
| 478 |
+
text_stripped = text.strip()
|
| 479 |
+
code_indicators = ['import ', 'from ', 'def ', 'class ', 'async def ', '@pytest', '@test']
|
| 480 |
+
|
| 481 |
+
# If text starts with code indicators, it might be plain code without markdown
|
| 482 |
+
if any(text_stripped.startswith(indicator) for indicator in code_indicators):
|
| 483 |
+
return text_stripped
|
| 484 |
+
|
| 485 |
+
# Otherwise, return empty string to trigger fallback
|
| 486 |
+
return ""
|
| 487 |
+
|
| 488 |
+
def _detect_language(self, file_path: str, code: str) -> str:
|
| 489 |
+
"""
|
| 490 |
+
Detect programming language from file extension or code content.
|
| 491 |
+
|
| 492 |
+
Args:
|
| 493 |
+
file_path: Path to the file
|
| 494 |
+
code: Source code content
|
| 495 |
+
|
| 496 |
+
Returns:
|
| 497 |
+
Detected language name
|
| 498 |
+
"""
|
| 499 |
+
if file_path:
|
| 500 |
+
ext = Path(file_path).suffix.lower()
|
| 501 |
+
extension_map = {
|
| 502 |
+
# Python
|
| 503 |
+
'.py': 'python', '.pyw': 'python', '.pyx': 'python',
|
| 504 |
+
# Java
|
| 505 |
+
'.java': 'java',
|
| 506 |
+
# JavaScript/TypeScript
|
| 507 |
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
| 508 |
+
'.ts': 'typescript', '.tsx': 'typescript',
|
| 509 |
+
# PHP
|
| 510 |
+
'.php': 'php', '.php3': 'php', '.php4': 'php', '.php5': 'php', '.phtml': 'php',
|
| 511 |
+
# Ruby
|
| 512 |
+
'.rb': 'ruby', '.rbw': 'ruby',
|
| 513 |
+
# Go
|
| 514 |
+
'.go': 'go',
|
| 515 |
+
# C/C++
|
| 516 |
+
'.c': 'c', '.h': 'c',
|
| 517 |
+
'.cpp': 'cpp', '.cc': 'cpp', '.cxx': 'cpp', '.c++': 'cpp',
|
| 518 |
+
'.hpp': 'cpp', '.hh': 'cpp', '.hxx': 'cpp', '.h++': 'cpp',
|
| 519 |
+
# C#
|
| 520 |
+
'.cs': 'csharp',
|
| 521 |
+
# Rust
|
| 522 |
+
'.rs': 'rust',
|
| 523 |
+
# Kotlin
|
| 524 |
+
'.kt': 'kotlin', '.kts': 'kotlin',
|
| 525 |
+
# Swift
|
| 526 |
+
'.swift': 'swift',
|
| 527 |
+
# Scala
|
| 528 |
+
'.scala': 'scala', '.sc': 'scala',
|
| 529 |
+
# R
|
| 530 |
+
'.r': 'r', '.R': 'r',
|
| 531 |
+
# Perl
|
| 532 |
+
'.pl': 'perl', '.pm': 'perl', '.t': 'perl', '.pod': 'perl',
|
| 533 |
+
# Shell
|
| 534 |
+
'.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', '.fish': 'shell'
|
| 535 |
+
}
|
| 536 |
+
if ext in extension_map:
|
| 537 |
+
return extension_map[ext]
|
| 538 |
+
|
| 539 |
+
# Fallback: detect from code content
|
| 540 |
+
if code:
|
| 541 |
+
if 'public class' in code or 'import java.' in code:
|
| 542 |
+
return 'java'
|
| 543 |
+
elif 'package main' in code or 'func main()' in code:
|
| 544 |
+
return 'go'
|
| 545 |
+
elif 'def ' in code and ('import ' in code or 'from ' in code):
|
| 546 |
+
return 'python'
|
| 547 |
+
elif 'function ' in code or 'const ' in code or 'let ' in code:
|
| 548 |
+
return 'javascript'
|
| 549 |
+
elif 'namespace ' in code and 'using ' in code:
|
| 550 |
+
return 'csharp'
|
| 551 |
+
elif 'fn main()' in code or 'use std::' in code:
|
| 552 |
+
return 'rust'
|
| 553 |
+
elif '<?php' in code:
|
| 554 |
+
return 'php'
|
| 555 |
+
elif 'class ' in code and 'def ' in code and 'end' in code:
|
| 556 |
+
return 'ruby'
|
| 557 |
+
|
| 558 |
+
return 'python' # Default
|
| 559 |
+
|
| 560 |
+
def _generate_fallback_test(self, file_path: str, language: str,
|
| 561 |
+
framework: str) -> str:
|
| 562 |
+
"""
|
| 563 |
+
Generate a basic fallback test when generation fails.
|
| 564 |
+
|
| 565 |
+
Args:
|
| 566 |
+
file_path: Path to the file
|
| 567 |
+
language: Programming language
|
| 568 |
+
framework: Test framework
|
| 569 |
+
|
| 570 |
+
Returns:
|
| 571 |
+
Basic test template
|
| 572 |
+
"""
|
| 573 |
+
if language == "python":
|
| 574 |
+
module_name = Path(file_path).stem
|
| 575 |
+
return f"""import sys
|
| 576 |
+
import os
|
| 577 |
+
# Ensure module can be imported from any directory structure
|
| 578 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 579 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 580 |
+
|
| 581 |
+
import pytest
|
| 582 |
+
from unittest.mock import Mock, patch
|
| 583 |
+
|
| 584 |
+
# Tests for {file_path}
|
| 585 |
+
# Note: These are placeholder tests. AI generation failed.
|
| 586 |
+
# Please add comprehensive tests based on your code's functionality.
|
| 587 |
+
|
| 588 |
+
class Test{module_name.title().replace('_', '')}:
|
| 589 |
+
\"\"\"Test suite for {module_name}\"\"\"
|
| 590 |
+
|
| 591 |
+
def test_module_imports(self):
|
| 592 |
+
\"\"\"Test that the module can be imported without errors\"\"\"
|
| 593 |
+
try:
|
| 594 |
+
import {module_name}
|
| 595 |
+
assert True
|
| 596 |
+
except ImportError:
|
| 597 |
+
pytest.skip("Module not in path")
|
| 598 |
+
|
| 599 |
+
def test_placeholder_basic(self):
|
| 600 |
+
\"\"\"Placeholder test - replace with actual tests\"\"\"
|
| 601 |
+
assert True
|
| 602 |
+
|
| 603 |
+
def test_placeholder_edge_cases(self):
|
| 604 |
+
\"\"\"Test edge cases - implement based on your code\"\"\"
|
| 605 |
+
# TODO: Add edge case tests
|
| 606 |
+
assert True
|
| 607 |
+
|
| 608 |
+
def test_placeholder_error_handling(self):
|
| 609 |
+
\"\"\"Test error handling - implement based on your code\"\"\"
|
| 610 |
+
# TODO: Add error handling tests
|
| 611 |
+
assert True
|
| 612 |
+
|
| 613 |
+
# TODO: Add comprehensive tests for {file_path}
|
| 614 |
+
# Consider testing:
|
| 615 |
+
# - Normal operation with valid inputs
|
| 616 |
+
# - Edge cases (empty, None, boundary values)
|
| 617 |
+
# - Error conditions and exceptions
|
| 618 |
+
# - Integration with other modules
|
| 619 |
+
"""
|
| 620 |
+
elif language == "java":
|
| 621 |
+
class_name = Path(file_path).stem.replace('_', '').title()
|
| 622 |
+
return f"""import org.junit.jupiter.api.Test;
|
| 623 |
+
import org.junit.jupiter.api.BeforeEach;
|
| 624 |
+
import org.junit.jupiter.api.DisplayName;
|
| 625 |
+
import static org.junit.jupiter.api.Assertions.*;
|
| 626 |
+
|
| 627 |
+
/**
|
| 628 |
+
* Tests for {file_path}
|
| 629 |
+
* Note: These are placeholder tests. AI generation failed.
|
| 630 |
+
* Please add comprehensive tests based on your code's functionality.
|
| 631 |
+
*/
|
| 632 |
+
class {class_name}Test {{
|
| 633 |
+
|
| 634 |
+
@BeforeEach
|
| 635 |
+
void setUp() {{
|
| 636 |
+
// Initialize test fixtures
|
| 637 |
+
}}
|
| 638 |
+
|
| 639 |
+
@Test
|
| 640 |
+
@DisplayName("Placeholder test - replace with actual tests")
|
| 641 |
+
void testPlaceholderBasic() {{
|
| 642 |
+
assertTrue(true);
|
| 643 |
+
}}
|
| 644 |
+
|
| 645 |
+
@Test
|
| 646 |
+
@DisplayName("Test edge cases - implement based on your code")
|
| 647 |
+
void testEdgeCases() {{
|
| 648 |
+
// TODO: Add edge case tests
|
| 649 |
+
assertTrue(true);
|
| 650 |
+
}}
|
| 651 |
+
|
| 652 |
+
@Test
|
| 653 |
+
@DisplayName("Test error handling - implement based on your code")
|
| 654 |
+
void testErrorHandling() {{
|
| 655 |
+
// TODO: Add error handling tests
|
| 656 |
+
assertTrue(true);
|
| 657 |
+
}}
|
| 658 |
+
}}
|
| 659 |
+
|
| 660 |
+
// TODO: Add comprehensive tests for {file_path}
|
| 661 |
+
// Consider testing:
|
| 662 |
+
// - Normal operation with valid inputs
|
| 663 |
+
// - Edge cases (null, empty, boundary values)
|
| 664 |
+
// - Exception handling
|
| 665 |
+
// - Integration with other classes
|
| 666 |
+
"""
|
| 667 |
+
elif language in ("javascript", "typescript"):
|
| 668 |
+
module_name = Path(file_path).stem
|
| 669 |
+
return f"""// Tests for {file_path}
|
| 670 |
+
// Note: These are placeholder tests. AI generation failed.
|
| 671 |
+
// Please add comprehensive tests based on your code's functionality.
|
| 672 |
+
|
| 673 |
+
describe('{module_name}', () => {{
|
| 674 |
+
beforeEach(() => {{
|
| 675 |
+
// Initialize test fixtures
|
| 676 |
+
}});
|
| 677 |
+
|
| 678 |
+
test('placeholder test - replace with actual tests', () => {{
|
| 679 |
+
expect(true).toBe(true);
|
| 680 |
+
}});
|
| 681 |
+
|
| 682 |
+
test('edge cases - implement based on your code', () => {{
|
| 683 |
+
// TODO: Add edge case tests
|
| 684 |
+
expect(true).toBe(true);
|
| 685 |
+
}});
|
| 686 |
+
|
| 687 |
+
test('error handling - implement based on your code', () => {{
|
| 688 |
+
// TODO: Add error handling tests
|
| 689 |
+
expect(true).toBe(true);
|
| 690 |
+
}});
|
| 691 |
+
}});
|
| 692 |
+
|
| 693 |
+
// TODO: Add comprehensive tests for {file_path}
|
| 694 |
+
// Consider testing:
|
| 695 |
+
// - Normal operation with valid inputs
|
| 696 |
+
// - Edge cases (null, undefined, empty, boundary values)
|
| 697 |
+
// - Error conditions and exceptions
|
| 698 |
+
// - Async operations (if applicable)
|
| 699 |
+
"""
|
| 700 |
+
else:
|
| 701 |
+
return f"""// Tests for {file_path}
|
| 702 |
+
// Language: {language}
|
| 703 |
+
// Note: AI test generation failed. Please add tests manually.
|
| 704 |
+
|
| 705 |
+
// TODO: Add comprehensive tests for {file_path}
|
| 706 |
+
"""
|
src/agents/transformer.py
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Code Transformer - Generates modernized code using AI with RAG.
|
| 3 |
+
Supports multiple AI providers (Gemini, Nebius, OpenAI).
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
import logging
|
| 9 |
+
from typing import Dict, List, Optional
|
| 10 |
+
|
| 11 |
+
from src.config import AIManager
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class CodeTransformer:
|
| 17 |
+
"""
|
| 18 |
+
Transforms legacy code to modern equivalents using Gemini 2.5 Flash.
|
| 19 |
+
Integrates with MCP servers for examples and context.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, mcp_manager=None, search_engine=None):
|
| 23 |
+
"""
|
| 24 |
+
Initialize Code Transformer.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
mcp_manager: Optional MCPManager instance
|
| 28 |
+
search_engine: Optional CodeSearchEngine instance
|
| 29 |
+
"""
|
| 30 |
+
self.mcp_manager = mcp_manager
|
| 31 |
+
self.search_engine = search_engine
|
| 32 |
+
|
| 33 |
+
# Use centralized AI manager
|
| 34 |
+
self.ai_manager = AIManager()
|
| 35 |
+
|
| 36 |
+
logger.info(
|
| 37 |
+
f"CodeTransformer initialized with provider: {self.ai_manager.provider_name}, "
|
| 38 |
+
f"model: {self.ai_manager.model_name}"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
async def transform_code(self, file_path: str, original_code: str,
|
| 42 |
+
transformation_plan: Dict) -> str:
|
| 43 |
+
"""
|
| 44 |
+
Transform legacy code using Gemini 2.5 Flash.
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
file_path: Path to the file being transformed
|
| 48 |
+
original_code: Original code content
|
| 49 |
+
transformation_plan: Plan from analyzer with steps and recommendations
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Modernized code as string
|
| 53 |
+
"""
|
| 54 |
+
logger.info(f"Transforming code: {file_path}")
|
| 55 |
+
|
| 56 |
+
# Get transformation examples from Memory MCP if available
|
| 57 |
+
examples_text = ""
|
| 58 |
+
if self.mcp_manager:
|
| 59 |
+
try:
|
| 60 |
+
from src.mcp.memory_client import MemoryMCPClient
|
| 61 |
+
memory_client = MemoryMCPClient(self.mcp_manager)
|
| 62 |
+
|
| 63 |
+
pattern_type = transformation_plan.get('pattern', '')
|
| 64 |
+
examples = await memory_client.get_transformation_examples(
|
| 65 |
+
pattern_type,
|
| 66 |
+
limit=3
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
if examples:
|
| 70 |
+
examples_text = "\n\nSUCCESSFUL TRANSFORMATION EXAMPLES:\n"
|
| 71 |
+
for i, ex in enumerate(examples, 1):
|
| 72 |
+
examples_text += f"\nExample {i}:\n"
|
| 73 |
+
examples_text += f"Before: {ex.get('before', '')[:200]}...\n"
|
| 74 |
+
examples_text += f"After: {ex.get('after', '')[:200]}...\n"
|
| 75 |
+
except Exception as e:
|
| 76 |
+
logger.warning(f"Could not retrieve transformation examples: {e}")
|
| 77 |
+
|
| 78 |
+
# Get similar code from search engine if available
|
| 79 |
+
context_text = ""
|
| 80 |
+
if self.search_engine:
|
| 81 |
+
try:
|
| 82 |
+
similar_files = self.search_engine.find_similar_patterns(
|
| 83 |
+
f"Modern code similar to {file_path}",
|
| 84 |
+
top_k=3
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
if similar_files:
|
| 88 |
+
context_text = "\n\nSIMILAR MODERN CODE EXAMPLES:\n"
|
| 89 |
+
for f in similar_files[:2]:
|
| 90 |
+
context_text += f"- {f['file_path']}: {f['text_snippet']}\n"
|
| 91 |
+
except Exception as e:
|
| 92 |
+
logger.warning(f"Could not get similar code context: {e}")
|
| 93 |
+
|
| 94 |
+
# Build transformation prompt
|
| 95 |
+
prompt = f"""You are an expert code modernization assistant. Transform this legacy code to modern best practices.
|
| 96 |
+
|
| 97 |
+
FILE: {file_path}
|
| 98 |
+
|
| 99 |
+
TRANSFORMATION PLAN:
|
| 100 |
+
{json.dumps(transformation_plan, indent=2)}
|
| 101 |
+
|
| 102 |
+
{examples_text}
|
| 103 |
+
{context_text}
|
| 104 |
+
|
| 105 |
+
ORIGINAL CODE:
|
| 106 |
+
```
|
| 107 |
+
{original_code}
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
SANDBOX EXECUTION CONTEXT (for reference when generating imports):
|
| 111 |
+
- This code will be tested in Modal Sandbox at /workspace/
|
| 112 |
+
- Python: Tests will be combined with source in test_<module>.py
|
| 113 |
+
- Java: Source in <Module>.java (package: com.modernizer), tests in <Module>Test.java
|
| 114 |
+
- JavaScript: Source in <module>.js (ES modules with Jest), tests in <module>.test.js
|
| 115 |
+
- TypeScript: Source in <module>.ts (CommonJS for Jest/ts-jest), tests in <module>.test.ts
|
| 116 |
+
- All files in same /workspace/ directory
|
| 117 |
+
- Use relative imports and ensure all external dependencies are available
|
| 118 |
+
|
| 119 |
+
CRITICAL MODULE SYSTEM RULES:
|
| 120 |
+
- TypeScript: Use CommonJS-compatible code (NO import.meta, NO top-level await)
|
| 121 |
+
- TypeScript: Jest uses ts-jest with module: "commonjs" - avoid ES module-only features
|
| 122 |
+
- JavaScript: Can use ES modules but avoid Node.js-specific ES module features
|
| 123 |
+
- Do NOT add CLI execution code (if __name__ == "__main__", import.meta.url checks, etc.)
|
| 124 |
+
- Focus on library/module code that can be imported and tested
|
| 125 |
+
|
| 126 |
+
REQUIREMENTS:
|
| 127 |
+
1. Apply the transformation plan exactly
|
| 128 |
+
2. Maintain behavioral equivalence (same inputs → same outputs)
|
| 129 |
+
3. Add type hints for all functions (Python) or appropriate types
|
| 130 |
+
4. Include docstrings for public functions
|
| 131 |
+
5. Follow language-specific style guides (PEP 8 for Python, Java conventions, etc.)
|
| 132 |
+
6. Add error handling where missing
|
| 133 |
+
7. Use environment variables for secrets/credentials
|
| 134 |
+
8. Add comments explaining complex logic
|
| 135 |
+
9. Ensure all imports are at the top
|
| 136 |
+
10. Remove unused imports and variables
|
| 137 |
+
11. Use correct relative paths for local imports (same directory imports)
|
| 138 |
+
12. Include necessary package declarations (Java) or module exports
|
| 139 |
+
13. CRITICAL: Export ALL types, interfaces, enums, and classes that might be used in tests
|
| 140 |
+
- TypeScript: Use 'export' keyword for all public types, interfaces, enums, classes
|
| 141 |
+
- JavaScript: Include all functions/classes in module.exports or export statements
|
| 142 |
+
- Python: All public functions/classes should be importable
|
| 143 |
+
- Java: Use public access modifiers for classes/methods that will be tested
|
| 144 |
+
|
| 145 |
+
IMPORTANT:
|
| 146 |
+
- Return ONLY the transformed code, no explanations or markdown formatting
|
| 147 |
+
- Do NOT include markdown code fences in the response
|
| 148 |
+
- Ensure imports work in sandbox environment where all files are in /workspace/
|
| 149 |
+
"""
|
| 150 |
+
|
| 151 |
+
try:
|
| 152 |
+
# Call AI with configured model
|
| 153 |
+
modernized_code = self.ai_manager.generate_content(
|
| 154 |
+
prompt=prompt,
|
| 155 |
+
temperature=AIManager.TEMPERATURE_MEDIUM,
|
| 156 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_LARGE
|
| 157 |
+
).strip()
|
| 158 |
+
|
| 159 |
+
# Extract code from markdown if present
|
| 160 |
+
modernized_code = self._extract_code(modernized_code)
|
| 161 |
+
|
| 162 |
+
# Validate that code is complete (not truncated)
|
| 163 |
+
if modernized_code:
|
| 164 |
+
# Check for common truncation indicators
|
| 165 |
+
last_lines = modernized_code.split('\n')[-5:]
|
| 166 |
+
last_text = '\n'.join(last_lines)
|
| 167 |
+
|
| 168 |
+
# Warn if code appears truncated
|
| 169 |
+
if (not modernized_code.rstrip().endswith((')', '}', ']', '"', "'")) and
|
| 170 |
+
len(modernized_code) > 1000 and
|
| 171 |
+
not any(keyword in last_text for keyword in ['if __name__', 'main()', 'return'])):
|
| 172 |
+
logger.warning(f"Code for {file_path} may be truncated (length: {len(modernized_code)} chars)")
|
| 173 |
+
logger.warning(f"Last few lines: {last_text[:200]}")
|
| 174 |
+
|
| 175 |
+
# Store successful transformation as example
|
| 176 |
+
if self.mcp_manager:
|
| 177 |
+
try:
|
| 178 |
+
from src.mcp.memory_client import MemoryMCPClient
|
| 179 |
+
memory_client = MemoryMCPClient(self.mcp_manager)
|
| 180 |
+
|
| 181 |
+
example = {
|
| 182 |
+
"pattern": transformation_plan.get('pattern', ''),
|
| 183 |
+
"before": original_code[:500],
|
| 184 |
+
"after": modernized_code[:500],
|
| 185 |
+
"file_path": file_path
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
example_id = f"{transformation_plan.get('pattern', 'unknown')}_{hash(file_path)}"
|
| 189 |
+
await memory_client.store_transformation_example(example_id, example)
|
| 190 |
+
except Exception as e:
|
| 191 |
+
logger.warning(f"Could not store transformation example: {e}")
|
| 192 |
+
|
| 193 |
+
logger.info(f"Transformation complete for {file_path}")
|
| 194 |
+
return modernized_code
|
| 195 |
+
|
| 196 |
+
except Exception as e:
|
| 197 |
+
logger.error(f"Error during transformation: {e}")
|
| 198 |
+
return original_code # Return original on error
|
| 199 |
+
|
| 200 |
+
def _extract_code(self, text: str) -> str:
|
| 201 |
+
"""
|
| 202 |
+
Extract code from markdown code blocks if present.
|
| 203 |
+
Handles both complete blocks and trailing markdown fences.
|
| 204 |
+
|
| 205 |
+
Args:
|
| 206 |
+
text: Text that may contain markdown code blocks
|
| 207 |
+
|
| 208 |
+
Returns:
|
| 209 |
+
Extracted code
|
| 210 |
+
"""
|
| 211 |
+
if not text:
|
| 212 |
+
return ""
|
| 213 |
+
|
| 214 |
+
# Check for markdown code blocks
|
| 215 |
+
if "```" in text:
|
| 216 |
+
# Try to extract code between ``` markers
|
| 217 |
+
parts = text.split("```")
|
| 218 |
+
if len(parts) >= 3:
|
| 219 |
+
# Get the code block (skip language identifier)
|
| 220 |
+
code_block = parts[1]
|
| 221 |
+
# Remove language identifier if present
|
| 222 |
+
lines = code_block.split('\n')
|
| 223 |
+
if lines[0].strip() in ['python', 'java', 'javascript', 'typescript', 'cpp', 'c', 'go', 'js', 'ts', 'py']:
|
| 224 |
+
code_block = '\n'.join(lines[1:])
|
| 225 |
+
return code_block.strip()
|
| 226 |
+
elif len(parts) == 2:
|
| 227 |
+
# Only one ``` found - might be trailing fence
|
| 228 |
+
# Take everything before the fence
|
| 229 |
+
return parts[0].strip()
|
| 230 |
+
|
| 231 |
+
# Remove any trailing markdown fences
|
| 232 |
+
text = text.strip()
|
| 233 |
+
if text.endswith('```'):
|
| 234 |
+
text = text[:-3].strip()
|
| 235 |
+
|
| 236 |
+
return text
|
| 237 |
+
|
| 238 |
+
async def bulk_transform(self, files: Dict[str, str],
|
| 239 |
+
transformation_plan: Dict) -> Dict[str, str]:
|
| 240 |
+
"""
|
| 241 |
+
Transform multiple files with the same pattern.
|
| 242 |
+
|
| 243 |
+
Args:
|
| 244 |
+
files: Dictionary mapping file paths to their contents
|
| 245 |
+
transformation_plan: Transformation plan to apply
|
| 246 |
+
|
| 247 |
+
Returns:
|
| 248 |
+
Dictionary mapping file paths to transformed code
|
| 249 |
+
"""
|
| 250 |
+
logger.info(f"Bulk transforming {len(files)} files")
|
| 251 |
+
|
| 252 |
+
results = {}
|
| 253 |
+
|
| 254 |
+
for file_path, original_code in files.items():
|
| 255 |
+
try:
|
| 256 |
+
transformed = await self.transform_code(
|
| 257 |
+
file_path,
|
| 258 |
+
original_code,
|
| 259 |
+
transformation_plan
|
| 260 |
+
)
|
| 261 |
+
results[file_path] = transformed
|
| 262 |
+
logger.info(f"✓ Transformed {file_path}")
|
| 263 |
+
except Exception as e:
|
| 264 |
+
logger.error(f"✗ Failed to transform {file_path}: {e}")
|
| 265 |
+
results[file_path] = original_code
|
| 266 |
+
|
| 267 |
+
logger.info(f"Bulk transformation complete: {len(results)}/{len(files)} successful")
|
| 268 |
+
return results
|
| 269 |
+
|
| 270 |
+
async def add_type_hints(self, file_path: str, code: str) -> str:
|
| 271 |
+
"""
|
| 272 |
+
Add type hints to Python code.
|
| 273 |
+
|
| 274 |
+
Args:
|
| 275 |
+
file_path: Path to the file
|
| 276 |
+
code: Code content
|
| 277 |
+
|
| 278 |
+
Returns:
|
| 279 |
+
Code with type hints added
|
| 280 |
+
"""
|
| 281 |
+
logger.info(f"Adding type hints to {file_path}")
|
| 282 |
+
|
| 283 |
+
prompt = f"""Add comprehensive type hints to this Python code.
|
| 284 |
+
|
| 285 |
+
FILE: {file_path}
|
| 286 |
+
|
| 287 |
+
CODE:
|
| 288 |
+
```python
|
| 289 |
+
{code}
|
| 290 |
+
```
|
| 291 |
+
|
| 292 |
+
REQUIREMENTS:
|
| 293 |
+
1. Add type hints to all function parameters and return types
|
| 294 |
+
2. Use typing module for complex types (List, Dict, Optional, etc.)
|
| 295 |
+
3. Add type hints to class attributes
|
| 296 |
+
4. Maintain all existing functionality
|
| 297 |
+
5. Follow PEP 484 type hinting standards
|
| 298 |
+
|
| 299 |
+
Return ONLY the code with type hints added, no explanations.
|
| 300 |
+
"""
|
| 301 |
+
|
| 302 |
+
try:
|
| 303 |
+
typed_code = self.ai_manager.generate_content(
|
| 304 |
+
prompt=prompt,
|
| 305 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 306 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_MEDIUM
|
| 307 |
+
)
|
| 308 |
+
|
| 309 |
+
return self._extract_code(typed_code)
|
| 310 |
+
|
| 311 |
+
except Exception as e:
|
| 312 |
+
logger.error(f"Error adding type hints: {e}")
|
| 313 |
+
return code
|
| 314 |
+
|
| 315 |
+
async def add_docstrings(self, file_path: str, code: str) -> str:
|
| 316 |
+
"""
|
| 317 |
+
Add docstrings to code.
|
| 318 |
+
|
| 319 |
+
Args:
|
| 320 |
+
file_path: Path to the file
|
| 321 |
+
code: Code content
|
| 322 |
+
|
| 323 |
+
Returns:
|
| 324 |
+
Code with docstrings added
|
| 325 |
+
"""
|
| 326 |
+
logger.info(f"Adding docstrings to {file_path}")
|
| 327 |
+
|
| 328 |
+
prompt = f"""Add comprehensive docstrings to this code.
|
| 329 |
+
|
| 330 |
+
FILE: {file_path}
|
| 331 |
+
|
| 332 |
+
CODE:
|
| 333 |
+
```
|
| 334 |
+
{code}
|
| 335 |
+
```
|
| 336 |
+
|
| 337 |
+
REQUIREMENTS:
|
| 338 |
+
1. Add docstrings to all functions and classes
|
| 339 |
+
2. Use Google-style or NumPy-style docstrings
|
| 340 |
+
3. Include parameter descriptions, return values, and exceptions
|
| 341 |
+
4. Add module-level docstring if missing
|
| 342 |
+
5. Maintain all existing functionality
|
| 343 |
+
|
| 344 |
+
Return ONLY the code with docstrings added, no explanations.
|
| 345 |
+
"""
|
| 346 |
+
|
| 347 |
+
try:
|
| 348 |
+
documented_code = self.ai_manager.generate_content(
|
| 349 |
+
prompt=prompt,
|
| 350 |
+
temperature=AIManager.TEMPERATURE_PRECISE,
|
| 351 |
+
max_tokens=AIManager.MAX_OUTPUT_TOKENS_MEDIUM
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
return self._extract_code(documented_code)
|
| 355 |
+
|
| 356 |
+
except Exception as e:
|
| 357 |
+
logger.error(f"Error adding docstrings: {e}")
|
| 358 |
+
return code
|
src/config/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration module for AI providers (Gemini, Nebius, OpenAI).
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .gemini_config import GeminiConfig
|
| 6 |
+
from .gemini_schemas import GeminiSchemas
|
| 7 |
+
from .ai_manager import AIManager, AIProvider
|
| 8 |
+
|
| 9 |
+
__all__ = ['GeminiConfig', 'GeminiSchemas', 'AIManager', 'AIProvider']
|
| 10 |
+
|
src/config/ai_manager.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized AI Manager for multiple providers.
|
| 3 |
+
Supports Gemini, Nebius Token Factory, and other OpenAI-compatible providers.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
import logging
|
| 9 |
+
from typing import Dict, Any, Optional, List
|
| 10 |
+
from enum import Enum
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
# Load environment variables
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class AIProvider(Enum):
|
| 20 |
+
"""Supported AI providers."""
|
| 21 |
+
GEMINI = "gemini"
|
| 22 |
+
NEBIUS = "nebius"
|
| 23 |
+
OPENAI = "openai"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class AIManager:
|
| 27 |
+
"""
|
| 28 |
+
Centralized manager for AI API calls across different providers.
|
| 29 |
+
Provides a unified interface regardless of the underlying provider.
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
# Default configurations
|
| 33 |
+
DEFAULT_PROVIDER = "gemini"
|
| 34 |
+
DEFAULT_GEMINI_MODEL = "gemini-2.5-flash"
|
| 35 |
+
DEFAULT_NEBIUS_MODEL = "zai-org/GLM-4.5"
|
| 36 |
+
DEFAULT_OPENAI_MODEL = "gpt-4"
|
| 37 |
+
|
| 38 |
+
# Temperature settings for different use cases
|
| 39 |
+
TEMPERATURE_PRECISE = 0.0 # For JSON schema responses
|
| 40 |
+
TEMPERATURE_LOW = 0.1 # For code generation
|
| 41 |
+
TEMPERATURE_MEDIUM = 0.2 # For transformations
|
| 42 |
+
TEMPERATURE_HIGH = 0.7 # For creative tasks
|
| 43 |
+
|
| 44 |
+
# Token limits
|
| 45 |
+
MAX_OUTPUT_TOKENS_SMALL = 8192
|
| 46 |
+
MAX_OUTPUT_TOKENS_MEDIUM = 16384
|
| 47 |
+
MAX_OUTPUT_TOKENS_LARGE = 32768
|
| 48 |
+
|
| 49 |
+
# Retry settings
|
| 50 |
+
MAX_RETRIES = 3
|
| 51 |
+
RETRY_DELAY = 1.0 # seconds
|
| 52 |
+
|
| 53 |
+
def __init__(self, provider: Optional[str] = None, model: Optional[str] = None):
|
| 54 |
+
"""
|
| 55 |
+
Initialize AI Manager.
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
provider: AI provider to use (gemini, nebius, openai).
|
| 59 |
+
If None, reads from AI_PROVIDER env var or uses default.
|
| 60 |
+
model: Model name to use. If None, reads from provider-specific env var.
|
| 61 |
+
"""
|
| 62 |
+
# Determine provider
|
| 63 |
+
self.provider_name = (
|
| 64 |
+
provider or
|
| 65 |
+
os.getenv("AI_PROVIDER", self.DEFAULT_PROVIDER)
|
| 66 |
+
).lower()
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
self.provider = AIProvider(self.provider_name)
|
| 70 |
+
except ValueError:
|
| 71 |
+
logger.warning(
|
| 72 |
+
f"Unknown provider '{self.provider_name}', falling back to Gemini"
|
| 73 |
+
)
|
| 74 |
+
self.provider = AIProvider.GEMINI
|
| 75 |
+
self.provider_name = "gemini"
|
| 76 |
+
|
| 77 |
+
# Initialize provider-specific client
|
| 78 |
+
if self.provider == AIProvider.GEMINI:
|
| 79 |
+
self._init_gemini(model)
|
| 80 |
+
elif self.provider == AIProvider.NEBIUS:
|
| 81 |
+
self._init_nebius(model)
|
| 82 |
+
elif self.provider == AIProvider.OPENAI:
|
| 83 |
+
self._init_openai(model)
|
| 84 |
+
|
| 85 |
+
logger.info(
|
| 86 |
+
f"AIManager initialized with provider: {self.provider_name}, "
|
| 87 |
+
f"model: {self.model_name}"
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
def _init_gemini(self, model: Optional[str] = None):
|
| 91 |
+
"""Initialize Gemini provider."""
|
| 92 |
+
from google import genai
|
| 93 |
+
|
| 94 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 95 |
+
if not api_key:
|
| 96 |
+
raise ValueError(
|
| 97 |
+
"GEMINI_API_KEY not found in environment variables. "
|
| 98 |
+
"Please set it in your .env file."
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
self.model_name = (
|
| 102 |
+
model or
|
| 103 |
+
os.getenv("GEMINI_MODEL", self.DEFAULT_GEMINI_MODEL)
|
| 104 |
+
)
|
| 105 |
+
|
| 106 |
+
self.client = genai.Client(api_key=api_key)
|
| 107 |
+
self.provider_type = "gemini"
|
| 108 |
+
|
| 109 |
+
def _init_nebius(self, model: Optional[str] = None):
|
| 110 |
+
"""Initialize Nebius Token Factory provider (OpenAI-compatible)."""
|
| 111 |
+
from openai import OpenAI
|
| 112 |
+
|
| 113 |
+
api_key = os.getenv("NEBIUS_API_KEY")
|
| 114 |
+
if not api_key:
|
| 115 |
+
raise ValueError(
|
| 116 |
+
"NEBIUS_API_KEY not found in environment variables. "
|
| 117 |
+
"Please set it in your .env file."
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
self.model_name = (
|
| 121 |
+
model or
|
| 122 |
+
os.getenv("NEBIUS_MODEL", self.DEFAULT_NEBIUS_MODEL)
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
self.client = OpenAI(
|
| 126 |
+
base_url="https://api.tokenfactory.nebius.com/v1/",
|
| 127 |
+
api_key=api_key
|
| 128 |
+
)
|
| 129 |
+
self.provider_type = "openai_compatible"
|
| 130 |
+
|
| 131 |
+
def _init_openai(self, model: Optional[str] = None):
|
| 132 |
+
"""Initialize OpenAI provider."""
|
| 133 |
+
from openai import OpenAI
|
| 134 |
+
|
| 135 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 136 |
+
if not api_key:
|
| 137 |
+
raise ValueError(
|
| 138 |
+
"OPENAI_API_KEY not found in environment variables. "
|
| 139 |
+
"Please set it in your .env file."
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
self.model_name = (
|
| 143 |
+
model or
|
| 144 |
+
os.getenv("OPENAI_MODEL", self.DEFAULT_OPENAI_MODEL)
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
self.client = OpenAI(api_key=api_key)
|
| 148 |
+
self.provider_type = "openai_compatible"
|
| 149 |
+
|
| 150 |
+
def generate_content(
|
| 151 |
+
self,
|
| 152 |
+
prompt: str,
|
| 153 |
+
temperature: float = TEMPERATURE_LOW,
|
| 154 |
+
max_tokens: int = MAX_OUTPUT_TOKENS_MEDIUM,
|
| 155 |
+
response_format: Optional[str] = None,
|
| 156 |
+
response_schema: Optional[Dict[str, Any]] = None,
|
| 157 |
+
system_prompt: Optional[str] = None
|
| 158 |
+
) -> str:
|
| 159 |
+
"""
|
| 160 |
+
Generate content using the configured AI provider.
|
| 161 |
+
|
| 162 |
+
Args:
|
| 163 |
+
prompt: The prompt to send to the AI
|
| 164 |
+
temperature: Temperature setting (0.0-1.0)
|
| 165 |
+
max_tokens: Maximum output tokens
|
| 166 |
+
response_format: Response format ("json" or None)
|
| 167 |
+
response_schema: JSON schema for structured responses (Gemini format)
|
| 168 |
+
system_prompt: Optional system prompt (for OpenAI-compatible providers)
|
| 169 |
+
|
| 170 |
+
Returns:
|
| 171 |
+
Generated text content
|
| 172 |
+
"""
|
| 173 |
+
if self.provider_type == "gemini":
|
| 174 |
+
return self._generate_gemini(
|
| 175 |
+
prompt, temperature, max_tokens,
|
| 176 |
+
response_format, response_schema
|
| 177 |
+
)
|
| 178 |
+
else: # openai_compatible
|
| 179 |
+
return self._generate_openai_compatible(
|
| 180 |
+
prompt, temperature, max_tokens,
|
| 181 |
+
response_format, system_prompt
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
def _generate_gemini(
|
| 185 |
+
self,
|
| 186 |
+
prompt: str,
|
| 187 |
+
temperature: float,
|
| 188 |
+
max_tokens: int,
|
| 189 |
+
response_format: Optional[str],
|
| 190 |
+
response_schema: Optional[Dict[str, Any]]
|
| 191 |
+
) -> str:
|
| 192 |
+
"""Generate content using Gemini API."""
|
| 193 |
+
config = {
|
| 194 |
+
"temperature": temperature,
|
| 195 |
+
"max_output_tokens": max_tokens,
|
| 196 |
+
"top_p": 0.95,
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
# Add JSON schema if provided
|
| 200 |
+
if response_schema:
|
| 201 |
+
config["response_mime_type"] = "application/json"
|
| 202 |
+
config["response_schema"] = response_schema
|
| 203 |
+
elif response_format == "json":
|
| 204 |
+
config["response_mime_type"] = "application/json"
|
| 205 |
+
|
| 206 |
+
response = self.client.models.generate_content(
|
| 207 |
+
model=self.model_name,
|
| 208 |
+
contents=prompt,
|
| 209 |
+
config=config
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
return response.text
|
| 213 |
+
|
| 214 |
+
def _generate_openai_compatible(
|
| 215 |
+
self,
|
| 216 |
+
prompt: str,
|
| 217 |
+
temperature: float,
|
| 218 |
+
max_tokens: int,
|
| 219 |
+
response_format: Optional[str],
|
| 220 |
+
system_prompt: Optional[str]
|
| 221 |
+
) -> str:
|
| 222 |
+
"""Generate content using OpenAI-compatible API."""
|
| 223 |
+
messages = []
|
| 224 |
+
|
| 225 |
+
# Add system prompt if provided
|
| 226 |
+
if system_prompt:
|
| 227 |
+
messages.append({"role": "system", "content": system_prompt})
|
| 228 |
+
|
| 229 |
+
messages.append({"role": "user", "content": prompt})
|
| 230 |
+
|
| 231 |
+
kwargs = {
|
| 232 |
+
"model": self.model_name,
|
| 233 |
+
"messages": messages,
|
| 234 |
+
"temperature": temperature,
|
| 235 |
+
"max_tokens": max_tokens,
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
# Add JSON mode if requested
|
| 239 |
+
if response_format == "json":
|
| 240 |
+
kwargs["response_format"] = {"type": "json_object"}
|
| 241 |
+
|
| 242 |
+
response = self.client.chat.completions.create(**kwargs)
|
| 243 |
+
|
| 244 |
+
return response.choices[0].message.content
|
| 245 |
+
|
| 246 |
+
def get_base_config(
|
| 247 |
+
self,
|
| 248 |
+
temperature: float = TEMPERATURE_LOW,
|
| 249 |
+
max_tokens: int = MAX_OUTPUT_TOKENS_MEDIUM
|
| 250 |
+
) -> Dict[str, Any]:
|
| 251 |
+
"""
|
| 252 |
+
Get base configuration for AI calls.
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
temperature: Temperature setting (0.0-1.0)
|
| 256 |
+
max_tokens: Maximum output tokens
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
Configuration dictionary
|
| 260 |
+
"""
|
| 261 |
+
return {
|
| 262 |
+
"temperature": temperature,
|
| 263 |
+
"max_tokens": max_tokens,
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
def get_json_config(
|
| 267 |
+
self,
|
| 268 |
+
schema: Optional[Dict[str, Any]] = None,
|
| 269 |
+
temperature: float = TEMPERATURE_PRECISE,
|
| 270 |
+
max_tokens: int = MAX_OUTPUT_TOKENS_MEDIUM
|
| 271 |
+
) -> Dict[str, Any]:
|
| 272 |
+
"""
|
| 273 |
+
Get configuration for JSON schema-enforced responses.
|
| 274 |
+
|
| 275 |
+
Args:
|
| 276 |
+
schema: JSON schema dictionary (Gemini format)
|
| 277 |
+
temperature: Temperature setting (default: 0.0 for precision)
|
| 278 |
+
max_tokens: Maximum output tokens
|
| 279 |
+
|
| 280 |
+
Returns:
|
| 281 |
+
Configuration dictionary
|
| 282 |
+
"""
|
| 283 |
+
config = self.get_base_config(temperature, max_tokens)
|
| 284 |
+
config["response_format"] = "json"
|
| 285 |
+
|
| 286 |
+
if schema and self.provider_type == "gemini":
|
| 287 |
+
config["response_schema"] = schema
|
| 288 |
+
|
| 289 |
+
return config
|
| 290 |
+
|
| 291 |
+
@classmethod
|
| 292 |
+
def validate_config(cls) -> bool:
|
| 293 |
+
"""
|
| 294 |
+
Validate that required configuration is present.
|
| 295 |
+
|
| 296 |
+
Returns:
|
| 297 |
+
True if configuration is valid
|
| 298 |
+
|
| 299 |
+
Raises:
|
| 300 |
+
ValueError: If required configuration is missing
|
| 301 |
+
"""
|
| 302 |
+
provider = os.getenv("AI_PROVIDER", cls.DEFAULT_PROVIDER).lower()
|
| 303 |
+
|
| 304 |
+
if provider == "gemini":
|
| 305 |
+
if not os.getenv("GEMINI_API_KEY"):
|
| 306 |
+
raise ValueError(
|
| 307 |
+
"GEMINI_API_KEY not found in environment variables. "
|
| 308 |
+
"Please set it in your .env file."
|
| 309 |
+
)
|
| 310 |
+
elif provider == "nebius":
|
| 311 |
+
if not os.getenv("NEBIUS_API_KEY"):
|
| 312 |
+
raise ValueError(
|
| 313 |
+
"NEBIUS_API_KEY not found in environment variables. "
|
| 314 |
+
"Please set it in your .env file."
|
| 315 |
+
)
|
| 316 |
+
elif provider == "openai":
|
| 317 |
+
if not os.getenv("OPENAI_API_KEY"):
|
| 318 |
+
raise ValueError(
|
| 319 |
+
"OPENAI_API_KEY not found in environment variables. "
|
| 320 |
+
"Please set it in your .env file."
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
return True
|
src/config/gemini_config.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Centralized Gemini API configuration.
|
| 3 |
+
Allows users to configure model settings from .env file.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
from typing import Optional
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
# Load environment variables
|
| 11 |
+
load_dotenv()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class GeminiConfig:
|
| 15 |
+
"""Centralized configuration for Gemini API."""
|
| 16 |
+
|
| 17 |
+
# Default model - can be overridden in .env
|
| 18 |
+
DEFAULT_MODEL = "gemini-2.5-flash"
|
| 19 |
+
|
| 20 |
+
# Model configuration from environment
|
| 21 |
+
MODEL_NAME: str = os.getenv("GEMINI_MODEL", DEFAULT_MODEL)
|
| 22 |
+
API_KEY: str = os.getenv("GEMINI_API_KEY", "")
|
| 23 |
+
|
| 24 |
+
# Temperature settings for different use cases
|
| 25 |
+
TEMPERATURE_PRECISE = 0.0 # For JSON schema responses
|
| 26 |
+
TEMPERATURE_LOW = 0.1 # For code generation
|
| 27 |
+
TEMPERATURE_MEDIUM = 0.2 # For transformations
|
| 28 |
+
TEMPERATURE_HIGH = 0.7 # For creative tasks
|
| 29 |
+
|
| 30 |
+
# Token limits
|
| 31 |
+
MAX_OUTPUT_TOKENS_SMALL = 8192
|
| 32 |
+
MAX_OUTPUT_TOKENS_MEDIUM = 16384
|
| 33 |
+
MAX_OUTPUT_TOKENS_LARGE = 32768
|
| 34 |
+
|
| 35 |
+
# Retry settings
|
| 36 |
+
MAX_RETRIES = 3
|
| 37 |
+
RETRY_DELAY = 1.0 # seconds
|
| 38 |
+
|
| 39 |
+
@classmethod
|
| 40 |
+
def validate(cls) -> bool:
|
| 41 |
+
"""Validate that required configuration is present."""
|
| 42 |
+
if not cls.API_KEY:
|
| 43 |
+
raise ValueError(
|
| 44 |
+
"GEMINI_API_KEY not found in environment variables. "
|
| 45 |
+
"Please set it in your .env file."
|
| 46 |
+
)
|
| 47 |
+
return True
|
| 48 |
+
|
| 49 |
+
@classmethod
|
| 50 |
+
def get_model_name(cls) -> str:
|
| 51 |
+
"""Get the configured model name."""
|
| 52 |
+
return cls.MODEL_NAME
|
| 53 |
+
|
| 54 |
+
@classmethod
|
| 55 |
+
def get_api_key(cls) -> str:
|
| 56 |
+
"""Get the API key."""
|
| 57 |
+
cls.validate()
|
| 58 |
+
return cls.API_KEY
|
| 59 |
+
|
| 60 |
+
@classmethod
|
| 61 |
+
def get_base_config(cls, temperature: float = TEMPERATURE_LOW,
|
| 62 |
+
max_tokens: int = MAX_OUTPUT_TOKENS_MEDIUM) -> dict:
|
| 63 |
+
"""
|
| 64 |
+
Get base configuration for Gemini API calls.
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
temperature: Temperature setting (0.0-1.0)
|
| 68 |
+
max_tokens: Maximum output tokens
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
Configuration dictionary
|
| 72 |
+
"""
|
| 73 |
+
return {
|
| 74 |
+
"temperature": temperature,
|
| 75 |
+
"max_output_tokens": max_tokens,
|
| 76 |
+
"top_p": 0.95,
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
@classmethod
|
| 80 |
+
def get_json_config(cls, schema: dict,
|
| 81 |
+
temperature: float = TEMPERATURE_PRECISE,
|
| 82 |
+
max_tokens: int = MAX_OUTPUT_TOKENS_MEDIUM) -> dict:
|
| 83 |
+
"""
|
| 84 |
+
Get configuration for JSON schema-enforced responses.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
schema: JSON schema dictionary
|
| 88 |
+
temperature: Temperature setting (default: 0.0 for precision)
|
| 89 |
+
max_tokens: Maximum output tokens
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
Configuration dictionary with schema enforcement
|
| 93 |
+
"""
|
| 94 |
+
config = cls.get_base_config(temperature, max_tokens)
|
| 95 |
+
config.update({
|
| 96 |
+
"response_mime_type": "application/json",
|
| 97 |
+
"response_schema": schema
|
| 98 |
+
})
|
| 99 |
+
return config
|
src/config/gemini_schemas.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
JSON schemas for Gemini API responses.
|
| 3 |
+
Ensures structured, predictable outputs from the AI model.
|
| 4 |
+
|
| 5 |
+
Note: Uses Google GenAI SDK schema format (uppercase types: STRING, NUMBER, etc.)
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from typing import Dict, Any
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class GeminiSchemas:
|
| 12 |
+
"""Collection of JSON schemas for different response types."""
|
| 13 |
+
|
| 14 |
+
@staticmethod
|
| 15 |
+
def language_detection() -> Dict[str, Any]:
|
| 16 |
+
"""Schema for language and framework detection."""
|
| 17 |
+
return {
|
| 18 |
+
"type": "OBJECT",
|
| 19 |
+
"properties": {
|
| 20 |
+
"language": {
|
| 21 |
+
"type": "STRING",
|
| 22 |
+
"description": "Detected programming language"
|
| 23 |
+
},
|
| 24 |
+
"framework": {
|
| 25 |
+
"type": "STRING",
|
| 26 |
+
"description": "Detected framework or empty string if none",
|
| 27 |
+
"nullable": True
|
| 28 |
+
},
|
| 29 |
+
"confidence": {
|
| 30 |
+
"type": "NUMBER",
|
| 31 |
+
"description": "Confidence score between 0.0 and 1.0"
|
| 32 |
+
}
|
| 33 |
+
},
|
| 34 |
+
"required": ["language", "framework", "confidence"]
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
@staticmethod
|
| 38 |
+
def pattern_analysis() -> Dict[str, Any]:
|
| 39 |
+
"""Schema for pattern analysis results."""
|
| 40 |
+
return {
|
| 41 |
+
"type": "OBJECT",
|
| 42 |
+
"properties": {
|
| 43 |
+
"patterns": {
|
| 44 |
+
"type": "ARRAY",
|
| 45 |
+
"items": {
|
| 46 |
+
"type": "OBJECT",
|
| 47 |
+
"properties": {
|
| 48 |
+
"pattern_type": {"type": "STRING"},
|
| 49 |
+
"severity": {
|
| 50 |
+
"type": "STRING",
|
| 51 |
+
"enum": ["critical", "high", "medium", "low", "info"]
|
| 52 |
+
},
|
| 53 |
+
"line_numbers": {
|
| 54 |
+
"type": "ARRAY",
|
| 55 |
+
"items": {"type": "INTEGER"}
|
| 56 |
+
},
|
| 57 |
+
"confidence": {
|
| 58 |
+
"type": "NUMBER"
|
| 59 |
+
},
|
| 60 |
+
"description": {"type": "STRING"},
|
| 61 |
+
"recommendation": {"type": "STRING"},
|
| 62 |
+
"estimated_effort_hours": {
|
| 63 |
+
"type": "NUMBER"
|
| 64 |
+
}
|
| 65 |
+
},
|
| 66 |
+
"required": [
|
| 67 |
+
"pattern_type", "severity", "line_numbers",
|
| 68 |
+
"confidence", "description", "recommendation",
|
| 69 |
+
"estimated_effort_hours"
|
| 70 |
+
]
|
| 71 |
+
}
|
| 72 |
+
},
|
| 73 |
+
"modernization_score": {
|
| 74 |
+
"type": "INTEGER"
|
| 75 |
+
},
|
| 76 |
+
"requires_modernization": {"type": "BOOLEAN"},
|
| 77 |
+
"overall_priority": {
|
| 78 |
+
"type": "STRING",
|
| 79 |
+
"enum": ["critical", "high", "medium", "low", "info"]
|
| 80 |
+
}
|
| 81 |
+
},
|
| 82 |
+
"required": [
|
| 83 |
+
"patterns", "modernization_score",
|
| 84 |
+
"requires_modernization", "overall_priority"
|
| 85 |
+
]
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
@staticmethod
|
| 89 |
+
def batch_pattern_analysis() -> Dict[str, Any]:
|
| 90 |
+
"""Schema for batch pattern analysis results."""
|
| 91 |
+
return {
|
| 92 |
+
"type": "OBJECT",
|
| 93 |
+
"properties": {
|
| 94 |
+
"files": {
|
| 95 |
+
"type": "ARRAY",
|
| 96 |
+
"items": {
|
| 97 |
+
"type": "OBJECT",
|
| 98 |
+
"properties": {
|
| 99 |
+
"file_path": {"type": "STRING"},
|
| 100 |
+
"language": {"type": "STRING"},
|
| 101 |
+
"framework": {
|
| 102 |
+
"type": "STRING",
|
| 103 |
+
"nullable": True
|
| 104 |
+
},
|
| 105 |
+
"patterns": {
|
| 106 |
+
"type": "ARRAY",
|
| 107 |
+
"items": {
|
| 108 |
+
"type": "OBJECT",
|
| 109 |
+
"properties": {
|
| 110 |
+
"pattern_type": {"type": "STRING"},
|
| 111 |
+
"severity": {
|
| 112 |
+
"type": "STRING",
|
| 113 |
+
"enum": ["critical", "high", "medium", "low", "info"]
|
| 114 |
+
},
|
| 115 |
+
"line_numbers": {
|
| 116 |
+
"type": "ARRAY",
|
| 117 |
+
"items": {"type": "INTEGER"}
|
| 118 |
+
},
|
| 119 |
+
"confidence": {
|
| 120 |
+
"type": "NUMBER"
|
| 121 |
+
},
|
| 122 |
+
"description": {"type": "STRING"},
|
| 123 |
+
"recommendation": {"type": "STRING"},
|
| 124 |
+
"estimated_effort_hours": {
|
| 125 |
+
"type": "NUMBER"
|
| 126 |
+
}
|
| 127 |
+
},
|
| 128 |
+
"required": [
|
| 129 |
+
"pattern_type", "severity", "line_numbers",
|
| 130 |
+
"confidence", "description", "recommendation",
|
| 131 |
+
"estimated_effort_hours"
|
| 132 |
+
]
|
| 133 |
+
}
|
| 134 |
+
},
|
| 135 |
+
"modernization_score": {
|
| 136 |
+
"type": "INTEGER"
|
| 137 |
+
},
|
| 138 |
+
"requires_modernization": {"type": "BOOLEAN"},
|
| 139 |
+
"overall_priority": {
|
| 140 |
+
"type": "STRING",
|
| 141 |
+
"enum": ["critical", "high", "medium", "low", "info"]
|
| 142 |
+
}
|
| 143 |
+
},
|
| 144 |
+
"required": [
|
| 145 |
+
"file_path", "language", "framework", "patterns",
|
| 146 |
+
"modernization_score", "requires_modernization",
|
| 147 |
+
"overall_priority"
|
| 148 |
+
]
|
| 149 |
+
}
|
| 150 |
+
}
|
| 151 |
+
},
|
| 152 |
+
"required": ["files"]
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
@staticmethod
|
| 156 |
+
def file_classification() -> Dict[str, Any]:
|
| 157 |
+
"""Schema for file classification results."""
|
| 158 |
+
return {
|
| 159 |
+
"type": "OBJECT",
|
| 160 |
+
"properties": {
|
| 161 |
+
"classification": {
|
| 162 |
+
"type": "STRING",
|
| 163 |
+
"enum": ["primary", "secondary", "test", "config", "documentation"]
|
| 164 |
+
},
|
| 165 |
+
"confidence": {
|
| 166 |
+
"type": "NUMBER"
|
| 167 |
+
},
|
| 168 |
+
"reasoning": {"type": "STRING"},
|
| 169 |
+
"language": {"type": "STRING"},
|
| 170 |
+
"framework": {
|
| 171 |
+
"type": "STRING",
|
| 172 |
+
"nullable": True
|
| 173 |
+
}
|
| 174 |
+
},
|
| 175 |
+
"required": ["classification", "confidence", "reasoning", "language", "framework"]
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
@staticmethod
|
| 179 |
+
def code_analysis() -> Dict[str, Any]:
|
| 180 |
+
"""Schema for detailed code analysis."""
|
| 181 |
+
return {
|
| 182 |
+
"type": "OBJECT",
|
| 183 |
+
"properties": {
|
| 184 |
+
"summary": {"type": "STRING"},
|
| 185 |
+
"issues": {
|
| 186 |
+
"type": "ARRAY",
|
| 187 |
+
"items": {
|
| 188 |
+
"type": "OBJECT",
|
| 189 |
+
"properties": {
|
| 190 |
+
"type": {"type": "STRING"},
|
| 191 |
+
"severity": {
|
| 192 |
+
"type": "STRING",
|
| 193 |
+
"enum": ["critical", "high", "medium", "low", "info"]
|
| 194 |
+
},
|
| 195 |
+
"description": {"type": "STRING"},
|
| 196 |
+
"line_numbers": {
|
| 197 |
+
"type": "ARRAY",
|
| 198 |
+
"items": {"type": "INTEGER"}
|
| 199 |
+
},
|
| 200 |
+
"recommendation": {"type": "STRING"}
|
| 201 |
+
},
|
| 202 |
+
"required": ["type", "severity", "description", "line_numbers", "recommendation"]
|
| 203 |
+
}
|
| 204 |
+
},
|
| 205 |
+
"transformation_steps": {
|
| 206 |
+
"type": "ARRAY",
|
| 207 |
+
"items": {
|
| 208 |
+
"type": "OBJECT",
|
| 209 |
+
"properties": {
|
| 210 |
+
"step": {"type": "STRING"},
|
| 211 |
+
"description": {"type": "STRING"},
|
| 212 |
+
"priority": {
|
| 213 |
+
"type": "STRING",
|
| 214 |
+
"enum": ["critical", "high", "medium", "low"]
|
| 215 |
+
},
|
| 216 |
+
"estimated_hours": {
|
| 217 |
+
"type": "NUMBER"
|
| 218 |
+
}
|
| 219 |
+
},
|
| 220 |
+
"required": ["step", "description", "priority", "estimated_hours"]
|
| 221 |
+
}
|
| 222 |
+
},
|
| 223 |
+
"dependencies": {
|
| 224 |
+
"type": "ARRAY",
|
| 225 |
+
"items": {"type": "STRING"}
|
| 226 |
+
},
|
| 227 |
+
"estimated_total_hours": {
|
| 228 |
+
"type": "NUMBER"
|
| 229 |
+
}
|
| 230 |
+
},
|
| 231 |
+
"required": [
|
| 232 |
+
"summary", "issues", "transformation_steps",
|
| 233 |
+
"dependencies", "estimated_total_hours"
|
| 234 |
+
]
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
@staticmethod
|
| 238 |
+
def test_generation() -> Dict[str, Any]:
|
| 239 |
+
"""Schema for test generation metadata."""
|
| 240 |
+
return {
|
| 241 |
+
"type": "OBJECT",
|
| 242 |
+
"properties": {
|
| 243 |
+
"test_framework": {"type": "STRING"},
|
| 244 |
+
"test_count": {
|
| 245 |
+
"type": "INTEGER"
|
| 246 |
+
},
|
| 247 |
+
"coverage_areas": {
|
| 248 |
+
"type": "ARRAY",
|
| 249 |
+
"items": {"type": "STRING"}
|
| 250 |
+
},
|
| 251 |
+
"test_types": {
|
| 252 |
+
"type": "ARRAY",
|
| 253 |
+
"items": {
|
| 254 |
+
"type": "STRING",
|
| 255 |
+
"enum": ["unit", "integration", "edge_case", "error_handling"]
|
| 256 |
+
}
|
| 257 |
+
},
|
| 258 |
+
"notes": {"type": "STRING"}
|
| 259 |
+
},
|
| 260 |
+
"required": ["test_framework", "test_count", "coverage_areas", "test_types", "notes"]
|
| 261 |
+
}
|
src/mcp/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MCP (Model Context Protocol) integration module.
|
| 3 |
+
Manages connections to multiple MCP servers.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
# Avoid circular import by not importing at module level
|
| 7 |
+
# Import these when needed in your code instead
|
| 8 |
+
|
| 9 |
+
__all__ = ['MCPManager', 'MemoryMCPClient', 'SearchMCPClient', 'GitHubMCPClient']
|
src/mcp/github_client.py
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub MCP Client - Creates PRs using GitHub MCP server.
|
| 3 |
+
Phase 5: Automated PR creation with comprehensive documentation.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
import json
|
| 10 |
+
from typing import Dict, List, Optional
|
| 11 |
+
|
| 12 |
+
# Lazy imports to avoid circular dependency issues
|
| 13 |
+
ClientSession = None
|
| 14 |
+
StdioServerParameters = None
|
| 15 |
+
stdio_client = None
|
| 16 |
+
|
| 17 |
+
def _ensure_mcp_imports():
|
| 18 |
+
"""Lazy load MCP imports to avoid circular dependency."""
|
| 19 |
+
global ClientSession, StdioServerParameters, stdio_client
|
| 20 |
+
if ClientSession is None:
|
| 21 |
+
from mcp import ClientSession as CS, StdioServerParameters as SSP
|
| 22 |
+
from mcp.client.stdio import stdio_client as sc
|
| 23 |
+
ClientSession = CS
|
| 24 |
+
StdioServerParameters = SSP
|
| 25 |
+
stdio_client = sc
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class GitHubMCPClient:
|
| 31 |
+
"""
|
| 32 |
+
GitHub MCP client for automated PR creation.
|
| 33 |
+
Uses Model Context Protocol to interact with GitHub.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self, github_token: Optional[str] = None):
|
| 37 |
+
"""
|
| 38 |
+
Initialize GitHub MCP Client.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
github_token: Optional GitHub token. If not provided, uses GITHUB_TOKEN from environment.
|
| 42 |
+
"""
|
| 43 |
+
self.github_token = github_token or os.getenv("GITHUB_TOKEN")
|
| 44 |
+
if not self.github_token:
|
| 45 |
+
logger.warning("GITHUB_TOKEN not set - PR creation will be disabled")
|
| 46 |
+
|
| 47 |
+
logger.info("GitHubMCPClient initialized")
|
| 48 |
+
|
| 49 |
+
async def create_pr(
|
| 50 |
+
self,
|
| 51 |
+
repo_url: str,
|
| 52 |
+
changed_files: Dict[str, str],
|
| 53 |
+
pr_summary: str,
|
| 54 |
+
test_results: Dict,
|
| 55 |
+
base_branch: str = "main"
|
| 56 |
+
) -> Dict:
|
| 57 |
+
"""
|
| 58 |
+
Create GitHub PR using MCP.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
repo_url: GitHub repository URL (e.g., "owner/repo")
|
| 62 |
+
changed_files: Dictionary mapping file paths to new content
|
| 63 |
+
pr_summary: PR description summary
|
| 64 |
+
test_results: Test execution results
|
| 65 |
+
base_branch: Base branch to merge into
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
Dictionary with PR URL and details
|
| 69 |
+
"""
|
| 70 |
+
_ensure_mcp_imports() # Lazy load MCP
|
| 71 |
+
|
| 72 |
+
if not self.github_token:
|
| 73 |
+
return {
|
| 74 |
+
"success": False,
|
| 75 |
+
"error": "GITHUB_TOKEN not configured"
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
logger.info(f"Creating PR for {repo_url}")
|
| 79 |
+
|
| 80 |
+
try:
|
| 81 |
+
# Configure GitHub MCP server
|
| 82 |
+
server_params = StdioServerParameters(
|
| 83 |
+
command="npx",
|
| 84 |
+
args=["-y", "@modelcontextprotocol/server-github"],
|
| 85 |
+
env={"GITHUB_PERSONAL_ACCESS_TOKEN": self.github_token}
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
async with stdio_client(server_params) as (read, write):
|
| 89 |
+
async with ClientSession(read, write) as session:
|
| 90 |
+
await session.initialize()
|
| 91 |
+
|
| 92 |
+
# Create branch
|
| 93 |
+
branch_name = f"modernize/auto-{int(time.time())}"
|
| 94 |
+
logger.info(f"Creating branch: {branch_name}")
|
| 95 |
+
|
| 96 |
+
try:
|
| 97 |
+
await session.call_tool(
|
| 98 |
+
"create_branch",
|
| 99 |
+
arguments={
|
| 100 |
+
"repo": repo_url,
|
| 101 |
+
"branch": branch_name,
|
| 102 |
+
"from_branch": base_branch
|
| 103 |
+
}
|
| 104 |
+
)
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.error(f"Error creating branch: {e}")
|
| 107 |
+
return {"success": False, "error": f"Branch creation failed: {e}"}
|
| 108 |
+
|
| 109 |
+
# Commit files (batch by 10 files)
|
| 110 |
+
file_items = list(changed_files.items())
|
| 111 |
+
for i in range(0, len(file_items), 10):
|
| 112 |
+
batch = file_items[i:i+10]
|
| 113 |
+
files_payload = [
|
| 114 |
+
{"path": path, "content": content}
|
| 115 |
+
for path, content in batch
|
| 116 |
+
]
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
await session.call_tool(
|
| 120 |
+
"push_files",
|
| 121 |
+
arguments={
|
| 122 |
+
"repo": repo_url,
|
| 123 |
+
"branch": branch_name,
|
| 124 |
+
"files": files_payload,
|
| 125 |
+
"message": f"Modernize batch {i//10 + 1}"
|
| 126 |
+
}
|
| 127 |
+
)
|
| 128 |
+
except Exception as e:
|
| 129 |
+
logger.error(f"Error pushing files: {e}")
|
| 130 |
+
|
| 131 |
+
# Generate comprehensive PR description
|
| 132 |
+
pr_description = self._generate_pr_description(
|
| 133 |
+
pr_summary,
|
| 134 |
+
test_results,
|
| 135 |
+
changed_files
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Create pull request
|
| 139 |
+
logger.info("Creating pull request")
|
| 140 |
+
pr_result = await session.call_tool(
|
| 141 |
+
"create_pull_request",
|
| 142 |
+
arguments={
|
| 143 |
+
"repo": repo_url,
|
| 144 |
+
"title": "[Automated] Modernize codebase",
|
| 145 |
+
"body": pr_description,
|
| 146 |
+
"head": branch_name,
|
| 147 |
+
"base": base_branch,
|
| 148 |
+
"draft": False
|
| 149 |
+
}
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
logger.info(f"PR created successfully: {pr_result}")
|
| 153 |
+
|
| 154 |
+
return {
|
| 155 |
+
"success": True,
|
| 156 |
+
"pr_url": pr_result.get("url", ""),
|
| 157 |
+
"pr_number": pr_result.get("number", 0),
|
| 158 |
+
"branch": branch_name
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
logger.error(f"Error creating PR: {e}")
|
| 163 |
+
return {
|
| 164 |
+
"success": False,
|
| 165 |
+
"error": str(e)
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
def _generate_pr_description(
|
| 169 |
+
self,
|
| 170 |
+
summary: str,
|
| 171 |
+
test_results: Dict,
|
| 172 |
+
changed_files: Dict[str, str]
|
| 173 |
+
) -> str:
|
| 174 |
+
"""
|
| 175 |
+
Generate comprehensive PR description.
|
| 176 |
+
|
| 177 |
+
Args:
|
| 178 |
+
summary: High-level summary
|
| 179 |
+
test_results: Test execution results
|
| 180 |
+
changed_files: Changed files dictionary
|
| 181 |
+
|
| 182 |
+
Returns:
|
| 183 |
+
Formatted PR description in Markdown
|
| 184 |
+
"""
|
| 185 |
+
# Calculate statistics
|
| 186 |
+
total_files = len(changed_files)
|
| 187 |
+
total_lines_added = sum(content.count('\n') for content in changed_files.values())
|
| 188 |
+
|
| 189 |
+
tests_passed = test_results.get('tests_passed', 0)
|
| 190 |
+
tests_run = test_results.get('tests_run', 0)
|
| 191 |
+
pass_rate = (tests_passed / tests_run * 100) if tests_run > 0 else 0
|
| 192 |
+
coverage = test_results.get('coverage_percent', 0)
|
| 193 |
+
|
| 194 |
+
description = f"""## 🤖 Auto-generated by Legacy Code Modernizer Agent
|
| 195 |
+
|
| 196 |
+
## Summary
|
| 197 |
+
{summary}
|
| 198 |
+
|
| 199 |
+
## Key Changes
|
| 200 |
+
|
| 201 |
+
### Files Modified
|
| 202 |
+
- **Total files changed**: {total_files}
|
| 203 |
+
- **Lines added**: +{total_lines_added}
|
| 204 |
+
- **Modernization patterns applied**: Multiple (see details below)
|
| 205 |
+
|
| 206 |
+
### Testing Results
|
| 207 |
+
✅ **{tests_passed}/{tests_run} tests passed** ({pass_rate:.1f}% pass rate)
|
| 208 |
+
- Test coverage: {coverage:.1f}%
|
| 209 |
+
- Execution time: {test_results.get('execution_time', 0):.2f}s
|
| 210 |
+
- All tests run in isolated Modal sandbox
|
| 211 |
+
|
| 212 |
+
## Risk Assessment: **MEDIUM** ⚠️
|
| 213 |
+
|
| 214 |
+
### Why Medium Risk:
|
| 215 |
+
- Automated code transformation requires thorough review
|
| 216 |
+
- Database and API changes need integration testing
|
| 217 |
+
- Environment variables may need configuration
|
| 218 |
+
|
| 219 |
+
### Mitigation Steps:
|
| 220 |
+
1. ✅ All changes validated in sandbox environment
|
| 221 |
+
2. ✅ Comprehensive test suite generated and passing
|
| 222 |
+
3. ✅ Rollback plan included below
|
| 223 |
+
4. ⚠️ Manual review recommended before merging
|
| 224 |
+
|
| 225 |
+
## Deployment Checklist
|
| 226 |
+
|
| 227 |
+
**Before merging:**
|
| 228 |
+
- [ ] Review all file changes
|
| 229 |
+
- [ ] Verify environment variables are configured
|
| 230 |
+
- [ ] Run integration tests against staging
|
| 231 |
+
- [ ] Check for breaking changes in dependencies
|
| 232 |
+
- [ ] Update documentation if needed
|
| 233 |
+
|
| 234 |
+
**After merging:**
|
| 235 |
+
- [ ] Monitor application logs for errors
|
| 236 |
+
- [ ] Check performance metrics
|
| 237 |
+
- [ ] Verify all features working as expected
|
| 238 |
+
|
| 239 |
+
## Rollback Plan
|
| 240 |
+
|
| 241 |
+
If issues arise after deployment:
|
| 242 |
+
|
| 243 |
+
### Immediate Rollback (< 5 minutes)
|
| 244 |
+
```bash
|
| 245 |
+
# Revert to previous commit
|
| 246 |
+
git revert HEAD
|
| 247 |
+
git push origin main
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
### Alternative: Redeploy Previous Version
|
| 251 |
+
```bash
|
| 252 |
+
# Checkout previous commit
|
| 253 |
+
git checkout HEAD~1
|
| 254 |
+
# Deploy previous version
|
| 255 |
+
./deploy.sh
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
## Test Details
|
| 259 |
+
|
| 260 |
+
<details>
|
| 261 |
+
<summary>Click to expand test execution logs</summary>
|
| 262 |
+
|
| 263 |
+
```
|
| 264 |
+
{test_results.get('stdout', 'No test output available')[:2000]}
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
</details>
|
| 268 |
+
|
| 269 |
+
## Changed Files
|
| 270 |
+
|
| 271 |
+
<details>
|
| 272 |
+
<summary>Click to expand file list ({total_files} files)</summary>
|
| 273 |
+
|
| 274 |
+
{self._format_file_list(changed_files)}
|
| 275 |
+
|
| 276 |
+
</details>
|
| 277 |
+
|
| 278 |
+
---
|
| 279 |
+
|
| 280 |
+
**🙏 Generated with ❤️ by Legacy Code Modernizer**
|
| 281 |
+
|
| 282 |
+
**Pipeline Time**: {test_results.get('execution_time', 0):.1f}s
|
| 283 |
+
**Powered by**: Google Gemini, Nebius AI, LlamaIndex, Modal, MCP
|
| 284 |
+
|
| 285 |
+
**👥 Reviewers**: Please focus on:
|
| 286 |
+
1. Code quality and maintainability
|
| 287 |
+
2. Test coverage and edge cases
|
| 288 |
+
3. Environment configuration requirements
|
| 289 |
+
"""
|
| 290 |
+
|
| 291 |
+
return description
|
| 292 |
+
|
| 293 |
+
def _format_file_list(self, changed_files: Dict[str, str]) -> str:
|
| 294 |
+
"""Format changed files list for PR description."""
|
| 295 |
+
file_list = []
|
| 296 |
+
for i, file_path in enumerate(sorted(changed_files.keys())[:50], 1):
|
| 297 |
+
file_list.append(f"{i}. `{file_path}`")
|
| 298 |
+
|
| 299 |
+
if len(changed_files) > 50:
|
| 300 |
+
file_list.append(f"\n... and {len(changed_files) - 50} more files")
|
| 301 |
+
|
| 302 |
+
return "\n".join(file_list)
|
| 303 |
+
|
| 304 |
+
async def create_issue(
|
| 305 |
+
self,
|
| 306 |
+
repo_url: str,
|
| 307 |
+
title: str,
|
| 308 |
+
body: str,
|
| 309 |
+
labels: Optional[List[str]] = None
|
| 310 |
+
) -> Dict:
|
| 311 |
+
"""
|
| 312 |
+
Create GitHub issue using MCP.
|
| 313 |
+
|
| 314 |
+
Args:
|
| 315 |
+
repo_url: GitHub repository URL
|
| 316 |
+
title: Issue title
|
| 317 |
+
body: Issue description
|
| 318 |
+
labels: Optional list of labels
|
| 319 |
+
|
| 320 |
+
Returns:
|
| 321 |
+
Dictionary with issue details
|
| 322 |
+
"""
|
| 323 |
+
_ensure_mcp_imports() # Lazy load MCP
|
| 324 |
+
|
| 325 |
+
if not self.github_token:
|
| 326 |
+
return {"success": False, "error": "GITHUB_TOKEN not configured"}
|
| 327 |
+
|
| 328 |
+
logger.info(f"Creating issue in {repo_url}")
|
| 329 |
+
|
| 330 |
+
try:
|
| 331 |
+
server_params = StdioServerParameters(
|
| 332 |
+
command="npx",
|
| 333 |
+
args=["-y", "@modelcontextprotocol/server-github"],
|
| 334 |
+
env={"GITHUB_PERSONAL_ACCESS_TOKEN": self.github_token}
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
async with stdio_client(server_params) as (read, write):
|
| 338 |
+
async with ClientSession(read, write) as session:
|
| 339 |
+
await session.initialize()
|
| 340 |
+
|
| 341 |
+
result = await session.call_tool(
|
| 342 |
+
"create_issue",
|
| 343 |
+
arguments={
|
| 344 |
+
"repo": repo_url,
|
| 345 |
+
"title": title,
|
| 346 |
+
"body": body,
|
| 347 |
+
"labels": labels or []
|
| 348 |
+
}
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
return {
|
| 352 |
+
"success": True,
|
| 353 |
+
"issue_url": result.get("url", ""),
|
| 354 |
+
"issue_number": result.get("number", 0)
|
| 355 |
+
}
|
| 356 |
+
|
| 357 |
+
except Exception as e:
|
| 358 |
+
logger.error(f"Error creating issue: {e}")
|
| 359 |
+
return {"success": False, "error": str(e)}
|
| 360 |
+
|
| 361 |
+
async def add_pr_comment(
|
| 362 |
+
self,
|
| 363 |
+
repo_url: str,
|
| 364 |
+
pr_number: int,
|
| 365 |
+
comment: str
|
| 366 |
+
) -> Dict:
|
| 367 |
+
"""
|
| 368 |
+
Add comment to PR.
|
| 369 |
+
|
| 370 |
+
Args:
|
| 371 |
+
repo_url: GitHub repository URL
|
| 372 |
+
pr_number: PR number
|
| 373 |
+
comment: Comment text
|
| 374 |
+
|
| 375 |
+
Returns:
|
| 376 |
+
Success status
|
| 377 |
+
"""
|
| 378 |
+
_ensure_mcp_imports() # Lazy load MCP
|
| 379 |
+
|
| 380 |
+
if not self.github_token:
|
| 381 |
+
return {"success": False, "error": "GITHUB_TOKEN not configured"}
|
| 382 |
+
|
| 383 |
+
try:
|
| 384 |
+
server_params = StdioServerParameters(
|
| 385 |
+
command="npx",
|
| 386 |
+
args=["-y", "@modelcontextprotocol/server-github"],
|
| 387 |
+
env={"GITHUB_PERSONAL_ACCESS_TOKEN": self.github_token}
|
| 388 |
+
)
|
| 389 |
+
|
| 390 |
+
async with stdio_client(server_params) as (read, write):
|
| 391 |
+
async with ClientSession(read, write) as session:
|
| 392 |
+
await session.initialize()
|
| 393 |
+
|
| 394 |
+
await session.call_tool(
|
| 395 |
+
"add_comment",
|
| 396 |
+
arguments={
|
| 397 |
+
"repo": repo_url,
|
| 398 |
+
"issue_number": pr_number,
|
| 399 |
+
"body": comment
|
| 400 |
+
}
|
| 401 |
+
)
|
| 402 |
+
|
| 403 |
+
return {"success": True}
|
| 404 |
+
|
| 405 |
+
except Exception as e:
|
| 406 |
+
logger.error(f"Error adding comment: {e}")
|
| 407 |
+
return {"success": False, "error": str(e)}
|
src/mcp/manager.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MCP Manager - Central orchestrator for multiple MCP server connections.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import logging
|
| 7 |
+
from typing import Dict, Optional
|
| 8 |
+
from mcp import ClientSession, StdioServerParameters
|
| 9 |
+
from mcp.client.stdio import stdio_client
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class MCPManager:
|
| 15 |
+
"""
|
| 16 |
+
Manages multiple MCP server connections and sessions.
|
| 17 |
+
Provides centralized connection pooling and session management.
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
def __init__(self):
|
| 21 |
+
"""Initialize MCP Manager."""
|
| 22 |
+
self.servers: Dict[str, StdioServerParameters] = {}
|
| 23 |
+
self.sessions: Dict[str, ClientSession] = {}
|
| 24 |
+
self.active_connections: Dict[str, bool] = {}
|
| 25 |
+
|
| 26 |
+
logger.info("MCPManager initialized")
|
| 27 |
+
|
| 28 |
+
def register_server(self, name: str, command: str, args: list, env: Optional[Dict] = None):
|
| 29 |
+
"""
|
| 30 |
+
Register an MCP server configuration.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
name: Unique name for the server
|
| 34 |
+
command: Command to start the server
|
| 35 |
+
args: Arguments for the command
|
| 36 |
+
env: Optional environment variables
|
| 37 |
+
"""
|
| 38 |
+
server_params = StdioServerParameters(
|
| 39 |
+
command=command,
|
| 40 |
+
args=args,
|
| 41 |
+
env=env or {}
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
self.servers[name] = server_params
|
| 45 |
+
self.active_connections[name] = False
|
| 46 |
+
|
| 47 |
+
logger.info(f"Registered MCP server: {name}")
|
| 48 |
+
|
| 49 |
+
def register_github_server(self):
|
| 50 |
+
"""Register GitHub MCP server."""
|
| 51 |
+
github_token = os.getenv("GITHUB_TOKEN")
|
| 52 |
+
if not github_token:
|
| 53 |
+
logger.warning("GITHUB_TOKEN not set, GitHub MCP will not be available")
|
| 54 |
+
return
|
| 55 |
+
|
| 56 |
+
self.register_server(
|
| 57 |
+
name="github",
|
| 58 |
+
command="npx",
|
| 59 |
+
args=["-y", "@modelcontextprotocol/server-github"],
|
| 60 |
+
env={"GITHUB_PERSONAL_ACCESS_TOKEN": github_token}
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
def register_tavily_server(self):
|
| 64 |
+
"""Register Tavily Search MCP server."""
|
| 65 |
+
tavily_key = os.getenv("TAVILY_API_KEY")
|
| 66 |
+
if not tavily_key:
|
| 67 |
+
logger.warning("TAVILY_API_KEY not set, Tavily MCP will not be available")
|
| 68 |
+
return
|
| 69 |
+
|
| 70 |
+
self.register_server(
|
| 71 |
+
name="tavily",
|
| 72 |
+
command="npx",
|
| 73 |
+
args=["-y", "@modelcontextprotocol/server-tavily"],
|
| 74 |
+
env={"TAVILY_API_KEY": tavily_key}
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
def register_memory_server(self):
|
| 78 |
+
"""Register Memory MCP server."""
|
| 79 |
+
self.register_server(
|
| 80 |
+
name="memory",
|
| 81 |
+
command="npx",
|
| 82 |
+
args=["-y", "@modelcontextprotocol/server-memory"]
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
def register_filesystem_server(self, allowed_directories: Optional[list] = None):
|
| 86 |
+
"""
|
| 87 |
+
Register Filesystem MCP server.
|
| 88 |
+
|
| 89 |
+
Args:
|
| 90 |
+
allowed_directories: List of allowed directories for file access
|
| 91 |
+
"""
|
| 92 |
+
args = ["-y", "@modelcontextprotocol/server-filesystem"]
|
| 93 |
+
|
| 94 |
+
if allowed_directories:
|
| 95 |
+
args.extend(allowed_directories)
|
| 96 |
+
|
| 97 |
+
self.register_server(
|
| 98 |
+
name="filesystem",
|
| 99 |
+
command="npx",
|
| 100 |
+
args=args
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
def get_server_params(self, name: str) -> Optional[StdioServerParameters]:
|
| 104 |
+
"""
|
| 105 |
+
Get server parameters by name.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
name: Server name
|
| 109 |
+
|
| 110 |
+
Returns:
|
| 111 |
+
Server parameters or None if not found
|
| 112 |
+
"""
|
| 113 |
+
return self.servers.get(name)
|
| 114 |
+
|
| 115 |
+
def is_server_registered(self, name: str) -> bool:
|
| 116 |
+
"""
|
| 117 |
+
Check if a server is registered.
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
name: Server name
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
True if registered, False otherwise
|
| 124 |
+
"""
|
| 125 |
+
return name in self.servers
|
| 126 |
+
|
| 127 |
+
def list_servers(self) -> list:
|
| 128 |
+
"""
|
| 129 |
+
List all registered servers.
|
| 130 |
+
|
| 131 |
+
Returns:
|
| 132 |
+
List of server names
|
| 133 |
+
"""
|
| 134 |
+
return list(self.servers.keys())
|
| 135 |
+
|
| 136 |
+
async def initialize_all_servers(self):
|
| 137 |
+
"""Initialize all registered MCP servers."""
|
| 138 |
+
logger.info("Initializing all MCP servers...")
|
| 139 |
+
|
| 140 |
+
for name in self.servers:
|
| 141 |
+
try:
|
| 142 |
+
logger.info(f"Initializing {name} MCP server...")
|
| 143 |
+
# Note: Actual initialization happens when clients connect
|
| 144 |
+
self.active_connections[name] = True
|
| 145 |
+
except Exception as e:
|
| 146 |
+
logger.error(f"Failed to initialize {name}: {e}")
|
| 147 |
+
self.active_connections[name] = False
|
| 148 |
+
|
| 149 |
+
logger.info("MCP server initialization complete")
|
| 150 |
+
|
| 151 |
+
def get_active_servers(self) -> list:
|
| 152 |
+
"""
|
| 153 |
+
Get list of active server connections.
|
| 154 |
+
|
| 155 |
+
Returns:
|
| 156 |
+
List of active server names
|
| 157 |
+
"""
|
| 158 |
+
return [name for name, active in self.active_connections.items() if active]
|
| 159 |
+
|
| 160 |
+
def register_all_standard_servers(self):
|
| 161 |
+
"""Register all standard MCP servers."""
|
| 162 |
+
logger.info("Registering all standard MCP servers...")
|
| 163 |
+
|
| 164 |
+
self.register_github_server()
|
| 165 |
+
self.register_tavily_server()
|
| 166 |
+
self.register_memory_server()
|
| 167 |
+
self.register_filesystem_server()
|
| 168 |
+
|
| 169 |
+
logger.info(f"Registered {len(self.servers)} MCP servers")
|
src/mcp/memory_client.py
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Memory MCP Client - Store and retrieve analysis results using Memory MCP server.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import logging
|
| 7 |
+
from typing import Dict, Optional, Any
|
| 8 |
+
from mcp import ClientSession
|
| 9 |
+
from mcp.client.stdio import stdio_client
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class MemoryMCPClient:
|
| 15 |
+
"""
|
| 16 |
+
Client for Memory MCP server to cache analysis results and transformation examples.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
def __init__(self, mcp_manager):
|
| 20 |
+
"""
|
| 21 |
+
Initialize Memory MCP client.
|
| 22 |
+
|
| 23 |
+
Args:
|
| 24 |
+
mcp_manager: MCPManager instance
|
| 25 |
+
"""
|
| 26 |
+
self.mcp_manager = mcp_manager
|
| 27 |
+
self.server_name = "memory"
|
| 28 |
+
|
| 29 |
+
logger.info("MemoryMCPClient initialized")
|
| 30 |
+
|
| 31 |
+
async def store_pattern_analysis(self, pattern_id: str, analysis: Dict) -> bool:
|
| 32 |
+
"""
|
| 33 |
+
Store pattern analysis in MCP memory.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
pattern_id: Unique identifier for the pattern
|
| 37 |
+
analysis: Analysis data to store
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
True if successful, False otherwise
|
| 41 |
+
"""
|
| 42 |
+
try:
|
| 43 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 44 |
+
if not server_params:
|
| 45 |
+
logger.error(f"{self.server_name} MCP server not registered")
|
| 46 |
+
return False
|
| 47 |
+
|
| 48 |
+
async with stdio_client(server_params) as (read, write):
|
| 49 |
+
async with ClientSession(read, write) as session:
|
| 50 |
+
await session.initialize()
|
| 51 |
+
|
| 52 |
+
# Store entity in memory
|
| 53 |
+
result = await session.call_tool(
|
| 54 |
+
"store_entity",
|
| 55 |
+
arguments={
|
| 56 |
+
"name": f"pattern_{pattern_id}",
|
| 57 |
+
"content": json.dumps(analysis)
|
| 58 |
+
}
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
logger.info(f"Stored pattern analysis: {pattern_id}")
|
| 62 |
+
return True
|
| 63 |
+
|
| 64 |
+
except Exception as e:
|
| 65 |
+
logger.error(f"Error storing pattern analysis: {e}")
|
| 66 |
+
return False
|
| 67 |
+
|
| 68 |
+
async def retrieve_pattern_analysis(self, pattern_id: str) -> Optional[Dict]:
|
| 69 |
+
"""
|
| 70 |
+
Retrieve cached pattern analysis.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
pattern_id: Unique identifier for the pattern
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
Analysis data or None if not found
|
| 77 |
+
"""
|
| 78 |
+
try:
|
| 79 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 80 |
+
if not server_params:
|
| 81 |
+
logger.error(f"{self.server_name} MCP server not registered")
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
async with stdio_client(server_params) as (read, write):
|
| 85 |
+
async with ClientSession(read, write) as session:
|
| 86 |
+
await session.initialize()
|
| 87 |
+
|
| 88 |
+
# Retrieve entity from memory
|
| 89 |
+
result = await session.call_tool(
|
| 90 |
+
"retrieve_entity",
|
| 91 |
+
arguments={"name": f"pattern_{pattern_id}"}
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
if result and hasattr(result, 'content'):
|
| 95 |
+
data = json.loads(result.content[0].text)
|
| 96 |
+
logger.info(f"Retrieved pattern analysis: {pattern_id}")
|
| 97 |
+
return data
|
| 98 |
+
|
| 99 |
+
return None
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.error(f"Error retrieving pattern analysis: {e}")
|
| 103 |
+
return None
|
| 104 |
+
|
| 105 |
+
async def store_transformation_example(self, example_id: str, example: Dict) -> bool:
|
| 106 |
+
"""
|
| 107 |
+
Store a successful transformation example.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
example_id: Unique identifier for the example
|
| 111 |
+
example: Example data containing before/after code
|
| 112 |
+
|
| 113 |
+
Returns:
|
| 114 |
+
True if successful, False otherwise
|
| 115 |
+
"""
|
| 116 |
+
try:
|
| 117 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 118 |
+
if not server_params:
|
| 119 |
+
logger.error(f"{self.server_name} MCP server not registered")
|
| 120 |
+
return False
|
| 121 |
+
|
| 122 |
+
async with stdio_client(server_params) as (read, write):
|
| 123 |
+
async with ClientSession(read, write) as session:
|
| 124 |
+
await session.initialize()
|
| 125 |
+
|
| 126 |
+
result = await session.call_tool(
|
| 127 |
+
"store_entity",
|
| 128 |
+
arguments={
|
| 129 |
+
"name": f"example_{example_id}",
|
| 130 |
+
"content": json.dumps(example)
|
| 131 |
+
}
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
logger.info(f"Stored transformation example: {example_id}")
|
| 135 |
+
return True
|
| 136 |
+
|
| 137 |
+
except Exception as e:
|
| 138 |
+
logger.error(f"Error storing transformation example: {e}")
|
| 139 |
+
return False
|
| 140 |
+
|
| 141 |
+
async def get_transformation_examples(self, pattern_type: str, limit: int = 5) -> list:
|
| 142 |
+
"""
|
| 143 |
+
Retrieve transformation examples for a pattern type.
|
| 144 |
+
|
| 145 |
+
Args:
|
| 146 |
+
pattern_type: Type of pattern to get examples for
|
| 147 |
+
limit: Maximum number of examples to return
|
| 148 |
+
|
| 149 |
+
Returns:
|
| 150 |
+
List of transformation examples
|
| 151 |
+
"""
|
| 152 |
+
try:
|
| 153 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 154 |
+
if not server_params:
|
| 155 |
+
logger.error(f"{self.server_name} MCP server not registered")
|
| 156 |
+
return []
|
| 157 |
+
|
| 158 |
+
async with stdio_client(server_params) as (read, write):
|
| 159 |
+
async with ClientSession(read, write) as session:
|
| 160 |
+
await session.initialize()
|
| 161 |
+
|
| 162 |
+
# Search for examples matching pattern type
|
| 163 |
+
# Note: This is a simplified implementation
|
| 164 |
+
# In production, you'd want more sophisticated querying
|
| 165 |
+
examples = []
|
| 166 |
+
|
| 167 |
+
for i in range(limit):
|
| 168 |
+
try:
|
| 169 |
+
result = await session.call_tool(
|
| 170 |
+
"retrieve_entity",
|
| 171 |
+
arguments={"name": f"example_{pattern_type}_{i}"}
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
if result and hasattr(result, 'content'):
|
| 175 |
+
example = json.loads(result.content[0].text)
|
| 176 |
+
examples.append(example)
|
| 177 |
+
except:
|
| 178 |
+
break
|
| 179 |
+
|
| 180 |
+
logger.info(f"Retrieved {len(examples)} transformation examples")
|
| 181 |
+
return examples
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
logger.error(f"Error retrieving transformation examples: {e}")
|
| 185 |
+
return []
|
| 186 |
+
|
| 187 |
+
async def clear_cache(self) -> bool:
|
| 188 |
+
"""
|
| 189 |
+
Clear all cached data.
|
| 190 |
+
|
| 191 |
+
Returns:
|
| 192 |
+
True if successful, False otherwise
|
| 193 |
+
"""
|
| 194 |
+
try:
|
| 195 |
+
# Note: Memory MCP may not have a clear_all method
|
| 196 |
+
# This is a placeholder for future implementation
|
| 197 |
+
logger.info("Cache cleared (placeholder)")
|
| 198 |
+
return True
|
| 199 |
+
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.error(f"Error clearing cache: {e}")
|
| 202 |
+
return False
|
src/mcp/search_client.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Search MCP Client - Find migration guides and documentation using Tavily MCP server.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
from typing import List, Dict, Optional
|
| 7 |
+
from mcp import ClientSession
|
| 8 |
+
from mcp.client.stdio import stdio_client
|
| 9 |
+
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class SearchMCPClient:
|
| 14 |
+
"""
|
| 15 |
+
Client for Tavily Search MCP server to find migration guides and best practices.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, mcp_manager):
|
| 19 |
+
"""
|
| 20 |
+
Initialize Search MCP client.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
mcp_manager: MCPManager instance
|
| 24 |
+
"""
|
| 25 |
+
self.mcp_manager = mcp_manager
|
| 26 |
+
self.server_name = "tavily"
|
| 27 |
+
|
| 28 |
+
logger.info("SearchMCPClient initialized")
|
| 29 |
+
|
| 30 |
+
async def find_migration_guide(self, from_tech: str, to_tech: str, max_results: int = 5) -> List[Dict]:
|
| 31 |
+
"""
|
| 32 |
+
Find migration documentation for technology upgrade.
|
| 33 |
+
|
| 34 |
+
Args:
|
| 35 |
+
from_tech: Source technology (e.g., "Python 2.7")
|
| 36 |
+
to_tech: Target technology (e.g., "Python 3.12")
|
| 37 |
+
max_results: Maximum number of results to return
|
| 38 |
+
|
| 39 |
+
Returns:
|
| 40 |
+
List of search results with URLs and snippets
|
| 41 |
+
"""
|
| 42 |
+
try:
|
| 43 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 44 |
+
if not server_params:
|
| 45 |
+
logger.warning(f"{self.server_name} MCP server not registered, returning empty results")
|
| 46 |
+
return []
|
| 47 |
+
|
| 48 |
+
query = f"{from_tech} to {to_tech} migration guide best practices"
|
| 49 |
+
|
| 50 |
+
async with stdio_client(server_params) as (read, write):
|
| 51 |
+
async with ClientSession(read, write) as session:
|
| 52 |
+
await session.initialize()
|
| 53 |
+
|
| 54 |
+
result = await session.call_tool(
|
| 55 |
+
"search",
|
| 56 |
+
arguments={
|
| 57 |
+
"query": query,
|
| 58 |
+
"max_results": max_results
|
| 59 |
+
}
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# Parse results
|
| 63 |
+
results = []
|
| 64 |
+
if result and hasattr(result, 'content'):
|
| 65 |
+
for item in result.content:
|
| 66 |
+
if hasattr(item, 'text'):
|
| 67 |
+
results.append({
|
| 68 |
+
'title': item.text.get('title', ''),
|
| 69 |
+
'url': item.text.get('url', ''),
|
| 70 |
+
'snippet': item.text.get('snippet', ''),
|
| 71 |
+
'score': item.text.get('score', 0)
|
| 72 |
+
})
|
| 73 |
+
|
| 74 |
+
logger.info(f"Found {len(results)} migration guides for {from_tech} to {to_tech}")
|
| 75 |
+
return results
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
logger.error(f"Error finding migration guide: {e}")
|
| 79 |
+
return []
|
| 80 |
+
|
| 81 |
+
async def find_library_documentation(self, library_name: str, version: Optional[str] = None) -> List[Dict]:
|
| 82 |
+
"""
|
| 83 |
+
Find official documentation for a library.
|
| 84 |
+
|
| 85 |
+
Args:
|
| 86 |
+
library_name: Name of the library
|
| 87 |
+
version: Optional specific version
|
| 88 |
+
|
| 89 |
+
Returns:
|
| 90 |
+
List of documentation links
|
| 91 |
+
"""
|
| 92 |
+
try:
|
| 93 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 94 |
+
if not server_params:
|
| 95 |
+
logger.warning(f"{self.server_name} MCP server not registered, returning empty results")
|
| 96 |
+
return []
|
| 97 |
+
|
| 98 |
+
query = f"{library_name} official documentation"
|
| 99 |
+
if version:
|
| 100 |
+
query += f" version {version}"
|
| 101 |
+
|
| 102 |
+
async with stdio_client(server_params) as (read, write):
|
| 103 |
+
async with ClientSession(read, write) as session:
|
| 104 |
+
await session.initialize()
|
| 105 |
+
|
| 106 |
+
result = await session.call_tool(
|
| 107 |
+
"search",
|
| 108 |
+
arguments={
|
| 109 |
+
"query": query,
|
| 110 |
+
"max_results": 3
|
| 111 |
+
}
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
results = []
|
| 115 |
+
if result and hasattr(result, 'content'):
|
| 116 |
+
for item in result.content:
|
| 117 |
+
if hasattr(item, 'text'):
|
| 118 |
+
results.append({
|
| 119 |
+
'title': item.text.get('title', ''),
|
| 120 |
+
'url': item.text.get('url', ''),
|
| 121 |
+
'snippet': item.text.get('snippet', '')
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
+
logger.info(f"Found {len(results)} documentation links for {library_name}")
|
| 125 |
+
return results
|
| 126 |
+
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logger.error(f"Error finding library documentation: {e}")
|
| 129 |
+
return []
|
| 130 |
+
|
| 131 |
+
async def find_best_practices(self, topic: str, language: str = "python") -> List[Dict]:
|
| 132 |
+
"""
|
| 133 |
+
Find best practices for a specific topic.
|
| 134 |
+
|
| 135 |
+
Args:
|
| 136 |
+
topic: Topic to search for (e.g., "database connection pooling")
|
| 137 |
+
language: Programming language
|
| 138 |
+
|
| 139 |
+
Returns:
|
| 140 |
+
List of best practice resources
|
| 141 |
+
"""
|
| 142 |
+
try:
|
| 143 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 144 |
+
if not server_params:
|
| 145 |
+
logger.warning(f"{self.server_name} MCP server not registered, returning empty results")
|
| 146 |
+
return []
|
| 147 |
+
|
| 148 |
+
query = f"{language} {topic} best practices 2024"
|
| 149 |
+
|
| 150 |
+
async with stdio_client(server_params) as (read, write):
|
| 151 |
+
async with ClientSession(read, write) as session:
|
| 152 |
+
await session.initialize()
|
| 153 |
+
|
| 154 |
+
result = await session.call_tool(
|
| 155 |
+
"search",
|
| 156 |
+
arguments={
|
| 157 |
+
"query": query,
|
| 158 |
+
"max_results": 5
|
| 159 |
+
}
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
results = []
|
| 163 |
+
if result and hasattr(result, 'content'):
|
| 164 |
+
for item in result.content:
|
| 165 |
+
if hasattr(item, 'text'):
|
| 166 |
+
results.append({
|
| 167 |
+
'title': item.text.get('title', ''),
|
| 168 |
+
'url': item.text.get('url', ''),
|
| 169 |
+
'snippet': item.text.get('snippet', '')
|
| 170 |
+
})
|
| 171 |
+
|
| 172 |
+
logger.info(f"Found {len(results)} best practice resources for {topic}")
|
| 173 |
+
return results
|
| 174 |
+
|
| 175 |
+
except Exception as e:
|
| 176 |
+
logger.error(f"Error finding best practices: {e}")
|
| 177 |
+
return []
|
| 178 |
+
|
| 179 |
+
async def find_security_vulnerabilities(self, pattern: str, language: str = "python") -> List[Dict]:
|
| 180 |
+
"""
|
| 181 |
+
Find information about security vulnerabilities in a code pattern.
|
| 182 |
+
|
| 183 |
+
Args:
|
| 184 |
+
pattern: Code pattern to check (e.g., "SQL string interpolation")
|
| 185 |
+
language: Programming language
|
| 186 |
+
|
| 187 |
+
Returns:
|
| 188 |
+
List of security resources
|
| 189 |
+
"""
|
| 190 |
+
try:
|
| 191 |
+
server_params = self.mcp_manager.get_server_params(self.server_name)
|
| 192 |
+
if not server_params:
|
| 193 |
+
logger.warning(f"{self.server_name} MCP server not registered, returning empty results")
|
| 194 |
+
return []
|
| 195 |
+
|
| 196 |
+
query = f"{language} {pattern} security vulnerability CVE"
|
| 197 |
+
|
| 198 |
+
async with stdio_client(server_params) as (read, write):
|
| 199 |
+
async with ClientSession(read, write) as session:
|
| 200 |
+
await session.initialize()
|
| 201 |
+
|
| 202 |
+
result = await session.call_tool(
|
| 203 |
+
"search",
|
| 204 |
+
arguments={
|
| 205 |
+
"query": query,
|
| 206 |
+
"max_results": 5
|
| 207 |
+
}
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
results = []
|
| 211 |
+
if result and hasattr(result, 'content'):
|
| 212 |
+
for item in result.content:
|
| 213 |
+
if hasattr(item, 'text'):
|
| 214 |
+
results.append({
|
| 215 |
+
'title': item.text.get('title', ''),
|
| 216 |
+
'url': item.text.get('url', ''),
|
| 217 |
+
'snippet': item.text.get('snippet', ''),
|
| 218 |
+
'severity': self._extract_severity(item.text.get('snippet', ''))
|
| 219 |
+
})
|
| 220 |
+
|
| 221 |
+
logger.info(f"Found {len(results)} security resources for {pattern}")
|
| 222 |
+
return results
|
| 223 |
+
|
| 224 |
+
except Exception as e:
|
| 225 |
+
logger.error(f"Error finding security vulnerabilities: {e}")
|
| 226 |
+
return []
|
| 227 |
+
|
| 228 |
+
def _extract_severity(self, text: str) -> str:
|
| 229 |
+
"""
|
| 230 |
+
Extract severity level from text.
|
| 231 |
+
|
| 232 |
+
Args:
|
| 233 |
+
text: Text to analyze
|
| 234 |
+
|
| 235 |
+
Returns:
|
| 236 |
+
Severity level (critical, high, medium, low, unknown)
|
| 237 |
+
"""
|
| 238 |
+
text_lower = text.lower()
|
| 239 |
+
if 'critical' in text_lower:
|
| 240 |
+
return 'critical'
|
| 241 |
+
elif 'high' in text_lower:
|
| 242 |
+
return 'high'
|
| 243 |
+
elif 'medium' in text_lower or 'moderate' in text_lower:
|
| 244 |
+
return 'medium'
|
| 245 |
+
elif 'low' in text_lower:
|
| 246 |
+
return 'low'
|
| 247 |
+
return 'unknown'
|
src/sandbox/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Sandbox module for secure test execution."""
|
| 2 |
+
|
| 3 |
+
from .validator import ModalSandboxValidator, app
|
| 4 |
+
|
| 5 |
+
__all__ = ['ModalSandboxValidator', 'app']
|
src/sandbox/config.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Sandbox execution configuration.
|
| 3 |
+
Handles environment-specific settings for local vs Hugging Face deployment.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def is_huggingface_space() -> bool:
|
| 13 |
+
"""Detect if running in Hugging Face Spaces environment."""
|
| 14 |
+
return os.getenv("SPACE_ID") is not None or os.getenv("SYSTEM") == "spaces"
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def is_modal_configured() -> bool:
|
| 18 |
+
"""Check if Modal is properly configured with credentials."""
|
| 19 |
+
# Check for Modal token in environment
|
| 20 |
+
token_id = os.getenv("MODAL_TOKEN_ID")
|
| 21 |
+
token_secret = os.getenv("MODAL_TOKEN_SECRET")
|
| 22 |
+
|
| 23 |
+
# Check if modal config exists
|
| 24 |
+
modal_config_exists = os.path.exists(os.path.expanduser("~/.modal.toml"))
|
| 25 |
+
|
| 26 |
+
return bool((token_id and token_secret) or modal_config_exists)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def get_execution_mode() -> str:
|
| 30 |
+
"""
|
| 31 |
+
Determine the execution mode based on environment.
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
"modal" - Use Modal for execution (required for Hugging Face)
|
| 35 |
+
"local" - Use local subprocess execution
|
| 36 |
+
"auto" - Try Modal first, fallback to local
|
| 37 |
+
"""
|
| 38 |
+
# Explicit mode from environment
|
| 39 |
+
mode = os.getenv("EXECUTION_MODE", "").lower()
|
| 40 |
+
if mode in ("modal", "local", "auto"):
|
| 41 |
+
return mode
|
| 42 |
+
|
| 43 |
+
# Auto-detect based on environment
|
| 44 |
+
if is_huggingface_space():
|
| 45 |
+
# Hugging Face Spaces MUST use Modal
|
| 46 |
+
if is_modal_configured():
|
| 47 |
+
logger.info("Hugging Face Spaces detected - using Modal execution")
|
| 48 |
+
return "modal"
|
| 49 |
+
else:
|
| 50 |
+
logger.error("Hugging Face Spaces detected but Modal not configured!")
|
| 51 |
+
logger.error("Set MODAL_TOKEN_ID and MODAL_TOKEN_SECRET environment variables")
|
| 52 |
+
return "modal" # Still return modal, will fail with clear error
|
| 53 |
+
|
| 54 |
+
# Local development - try Modal first, fallback to local
|
| 55 |
+
if is_modal_configured():
|
| 56 |
+
return "auto"
|
| 57 |
+
else:
|
| 58 |
+
logger.info("Modal not configured - using local execution")
|
| 59 |
+
return "local"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def should_prefer_modal() -> bool:
|
| 63 |
+
"""Determine if Modal should be preferred over local execution."""
|
| 64 |
+
mode = get_execution_mode()
|
| 65 |
+
|
| 66 |
+
if mode == "modal":
|
| 67 |
+
return True
|
| 68 |
+
elif mode == "local":
|
| 69 |
+
return False
|
| 70 |
+
else: # auto
|
| 71 |
+
return is_modal_configured()
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def validate_environment():
|
| 75 |
+
"""
|
| 76 |
+
Validate that the environment is properly configured.
|
| 77 |
+
Raises warnings or errors for configuration issues.
|
| 78 |
+
"""
|
| 79 |
+
mode = get_execution_mode()
|
| 80 |
+
is_hf = is_huggingface_space()
|
| 81 |
+
modal_ok = is_modal_configured()
|
| 82 |
+
|
| 83 |
+
if is_hf and not modal_ok:
|
| 84 |
+
logger.error("=" * 60)
|
| 85 |
+
logger.error("CONFIGURATION ERROR: Hugging Face Spaces Deployment")
|
| 86 |
+
logger.error("=" * 60)
|
| 87 |
+
logger.error("Modal is REQUIRED for Hugging Face Spaces but not configured.")
|
| 88 |
+
logger.error("")
|
| 89 |
+
logger.error("To fix this:")
|
| 90 |
+
logger.error("1. Get Modal token from: https://modal.com/settings")
|
| 91 |
+
logger.error("2. Set Hugging Face Secrets:")
|
| 92 |
+
logger.error(" - MODAL_TOKEN_ID")
|
| 93 |
+
logger.error(" - MODAL_TOKEN_SECRET")
|
| 94 |
+
logger.error("3. Restart the Space")
|
| 95 |
+
logger.error("=" * 60)
|
| 96 |
+
return False
|
| 97 |
+
|
| 98 |
+
if mode == "modal" and not modal_ok:
|
| 99 |
+
logger.warning("Execution mode set to 'modal' but Modal not configured")
|
| 100 |
+
logger.warning("Tests will fail until Modal is configured")
|
| 101 |
+
return False
|
| 102 |
+
|
| 103 |
+
if mode == "local" and is_hf:
|
| 104 |
+
logger.warning("Local execution mode on Hugging Face Spaces will not work")
|
| 105 |
+
logger.warning("Change EXECUTION_MODE to 'modal'")
|
| 106 |
+
return False
|
| 107 |
+
|
| 108 |
+
# All good
|
| 109 |
+
logger.info(f"Environment validated: mode={mode}, huggingface={is_hf}, modal_configured={modal_ok}")
|
| 110 |
+
return True
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# Configuration values
|
| 114 |
+
EXECUTION_MODE = get_execution_mode()
|
| 115 |
+
PREFER_MODAL = should_prefer_modal()
|
| 116 |
+
IS_HUGGINGFACE = is_huggingface_space()
|
| 117 |
+
MODAL_CONFIGURED = is_modal_configured()
|
| 118 |
+
|
| 119 |
+
# Log configuration on import
|
| 120 |
+
logger.info(f"Sandbox Configuration:")
|
| 121 |
+
logger.info(f" Execution Mode: {EXECUTION_MODE}")
|
| 122 |
+
logger.info(f" Prefer Modal: {PREFER_MODAL}")
|
| 123 |
+
logger.info(f" Hugging Face: {IS_HUGGINGFACE}")
|
| 124 |
+
logger.info(f" Modal Configured: {MODAL_CONFIGURED}")
|
src/sandbox/images.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Modal Container Images for Multi-Language Test Execution.
|
| 3 |
+
Defines secure, isolated container images for each supported language.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import logging
|
| 7 |
+
|
| 8 |
+
logger = logging.getLogger(__name__)
|
| 9 |
+
|
| 10 |
+
# Try to import Modal
|
| 11 |
+
try:
|
| 12 |
+
import modal
|
| 13 |
+
MODAL_AVAILABLE = True
|
| 14 |
+
except ImportError:
|
| 15 |
+
MODAL_AVAILABLE = False
|
| 16 |
+
modal = None
|
| 17 |
+
logger.warning("Modal not available - will use local execution only")
|
| 18 |
+
|
| 19 |
+
# Create Modal app only if available
|
| 20 |
+
if MODAL_AVAILABLE:
|
| 21 |
+
app = modal.App("legacy-code-validator")
|
| 22 |
+
|
| 23 |
+
# ============================================================================
|
| 24 |
+
# SUPPORTED LANGUAGES (Production Ready)
|
| 25 |
+
# ============================================================================
|
| 26 |
+
|
| 27 |
+
python_image = (
|
| 28 |
+
modal.Image.debian_slim()
|
| 29 |
+
.pip_install(
|
| 30 |
+
"pytest>=9.0.0",
|
| 31 |
+
"pytest-cov>=6.0.0",
|
| 32 |
+
"pytest-timeout>=2.3.0",
|
| 33 |
+
"pytest-benchmark>=4.0.0",
|
| 34 |
+
"pytest-mock>=3.12.0"
|
| 35 |
+
)
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
java_image = (
|
| 39 |
+
modal.Image.debian_slim()
|
| 40 |
+
.apt_install("openjdk-17-jdk", "maven", "wget")
|
| 41 |
+
.run_commands(
|
| 42 |
+
"mvn --version"
|
| 43 |
+
)
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
javascript_image = (
|
| 47 |
+
modal.Image.debian_slim()
|
| 48 |
+
.apt_install(
|
| 49 |
+
"curl", "ca-certificates", "gnupg", "libxt6", "libxmu6", "libxaw7",
|
| 50 |
+
"build-essential", "python3", "git"
|
| 51 |
+
)
|
| 52 |
+
.run_commands(
|
| 53 |
+
# Install Node.js 20.x
|
| 54 |
+
"mkdir -p /etc/apt/keyrings",
|
| 55 |
+
"curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg",
|
| 56 |
+
"echo 'deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main' | tee /etc/apt/sources.list.d/nodesource.list",
|
| 57 |
+
"apt-get update",
|
| 58 |
+
"apt-get install -y nodejs",
|
| 59 |
+
# Pre-install Jest globally for faster test execution
|
| 60 |
+
"npm install -g jest@latest ts-jest@latest typescript@latest @types/jest@latest",
|
| 61 |
+
# Create a working directory and set permissions
|
| 62 |
+
"mkdir -p /workspace",
|
| 63 |
+
"chmod 777 /workspace",
|
| 64 |
+
"node --version",
|
| 65 |
+
"npm --version",
|
| 66 |
+
"jest --version"
|
| 67 |
+
)
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
# TypeScript uses same image as JavaScript
|
| 71 |
+
typescript_image = javascript_image
|
| 72 |
+
|
| 73 |
+
# ============================================================================
|
| 74 |
+
# IMAGE REGISTRY
|
| 75 |
+
# ============================================================================
|
| 76 |
+
|
| 77 |
+
LANGUAGE_IMAGES = {
|
| 78 |
+
# Supported Languages
|
| 79 |
+
'python': python_image,
|
| 80 |
+
'java': java_image,
|
| 81 |
+
'javascript': javascript_image,
|
| 82 |
+
'typescript': typescript_image
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# Support status for UI display
|
| 86 |
+
LANGUAGE_SUPPORT_STATUS = {
|
| 87 |
+
'python': 'production',
|
| 88 |
+
'java': 'production',
|
| 89 |
+
'javascript': 'production',
|
| 90 |
+
'typescript': 'production'
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
else:
|
| 94 |
+
# Fallback when Modal not available
|
| 95 |
+
app = None
|
| 96 |
+
LANGUAGE_IMAGES = {}
|
| 97 |
+
LANGUAGE_SUPPORT_STATUS = {}
|
| 98 |
+
python_image = None
|
| 99 |
+
java_image = None
|
| 100 |
+
javascript_image = None
|
| 101 |
+
typescript_image = None
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def get_image_for_language(language: str):
|
| 105 |
+
"""Get the appropriate Modal image for a language."""
|
| 106 |
+
if not MODAL_AVAILABLE:
|
| 107 |
+
return None
|
| 108 |
+
|
| 109 |
+
return LANGUAGE_IMAGES.get(language.lower())
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def get_support_status(language: str) -> str:
|
| 113 |
+
"""Get support status for a language: production, experimental, planned, or unsupported."""
|
| 114 |
+
if not MODAL_AVAILABLE:
|
| 115 |
+
return 'local_only'
|
| 116 |
+
|
| 117 |
+
return LANGUAGE_SUPPORT_STATUS.get(language.lower(), 'unsupported')
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
def is_language_supported(language: str) -> bool:
|
| 121 |
+
"""Check if a language is supported in Modal."""
|
| 122 |
+
return language.lower() in LANGUAGE_IMAGES
|
src/sandbox/modal_executor.py
ADDED
|
@@ -0,0 +1,423 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Modal-based test executor using Modal Sandboxes for multi-language support.
|
| 3 |
+
Uses Sandbox.exec() API for more flexible and reliable language execution.
|
| 4 |
+
Supports: Python, Java, JavaScript, TypeScript, and more.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import logging
|
| 8 |
+
import tempfile
|
| 9 |
+
import json
|
| 10 |
+
from typing import Dict, List
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
# Try to import Modal
|
| 16 |
+
try:
|
| 17 |
+
import modal
|
| 18 |
+
import os
|
| 19 |
+
|
| 20 |
+
# Configure Modal authentication from environment if available
|
| 21 |
+
token_id = os.getenv("MODAL_TOKEN_ID")
|
| 22 |
+
token_secret = os.getenv("MODAL_TOKEN_SECRET")
|
| 23 |
+
|
| 24 |
+
if token_id and token_secret:
|
| 25 |
+
# Set Modal credentials from environment variables
|
| 26 |
+
# This is needed for Hugging Face Spaces deployment
|
| 27 |
+
os.environ["MODAL_TOKEN_ID"] = token_id
|
| 28 |
+
os.environ["MODAL_TOKEN_SECRET"] = token_secret
|
| 29 |
+
logger.info("Modal credentials loaded from environment")
|
| 30 |
+
|
| 31 |
+
MODAL_AVAILABLE = True
|
| 32 |
+
except ImportError:
|
| 33 |
+
MODAL_AVAILABLE = False
|
| 34 |
+
modal = None
|
| 35 |
+
logger.warning("Modal not available - install with: pip install modal")
|
| 36 |
+
|
| 37 |
+
if MODAL_AVAILABLE:
|
| 38 |
+
from .images import LANGUAGE_IMAGES
|
| 39 |
+
|
| 40 |
+
def _execute_python_in_sandbox(sb: modal.Sandbox, code: str, tests: str,
|
| 41 |
+
module_name: str) -> Dict:
|
| 42 |
+
"""Execute Python tests in Modal Sandbox using pytest."""
|
| 43 |
+
try:
|
| 44 |
+
# Ensure workspace directory exists
|
| 45 |
+
p = sb.exec("mkdir", "-p", "/workspace", timeout=30)
|
| 46 |
+
p.wait()
|
| 47 |
+
|
| 48 |
+
# Create a combined test file
|
| 49 |
+
test_content = f"""# Test module
|
| 50 |
+
{code}
|
| 51 |
+
|
| 52 |
+
# Tests
|
| 53 |
+
{tests}
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
# Upload files to sandbox
|
| 57 |
+
with sb.open(f"/workspace/test_{module_name}.py", "w") as f:
|
| 58 |
+
f.write(test_content)
|
| 59 |
+
|
| 60 |
+
# Run pytest
|
| 61 |
+
p = sb.exec("python", "-m", "pytest", f"/workspace/test_{module_name}.py",
|
| 62 |
+
"-v", "--tb=short", timeout=120)
|
| 63 |
+
p.wait()
|
| 64 |
+
|
| 65 |
+
stdout = p.stdout.read()
|
| 66 |
+
stderr = p.stderr.read()
|
| 67 |
+
|
| 68 |
+
logger.info(f"Python test output: {stdout}")
|
| 69 |
+
|
| 70 |
+
# Parse results
|
| 71 |
+
success = p.returncode == 0
|
| 72 |
+
|
| 73 |
+
return {
|
| 74 |
+
"success": success,
|
| 75 |
+
"tests_run": 1,
|
| 76 |
+
"tests_passed": 1 if success else 0,
|
| 77 |
+
"tests_failed": 0 if success else 1,
|
| 78 |
+
"stdout": stdout,
|
| 79 |
+
"stderr": stderr,
|
| 80 |
+
"execution_mode": "modal",
|
| 81 |
+
"language": "python"
|
| 82 |
+
}
|
| 83 |
+
except Exception as e:
|
| 84 |
+
logger.error(f"Python sandbox execution failed: {e}", exc_info=True)
|
| 85 |
+
return {
|
| 86 |
+
"success": False,
|
| 87 |
+
"error": f"Python execution error: {str(e)}",
|
| 88 |
+
"tests_run": 0,
|
| 89 |
+
"tests_passed": 0,
|
| 90 |
+
"tests_failed": 0,
|
| 91 |
+
"execution_mode": "modal",
|
| 92 |
+
"language": "python"
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
def _execute_java_in_sandbox(sb: modal.Sandbox, code: str, tests: str,
|
| 96 |
+
module_name: str) -> Dict:
|
| 97 |
+
"""Execute Java tests in Modal Sandbox using Maven."""
|
| 98 |
+
try:
|
| 99 |
+
# Ensure workspace directory exists
|
| 100 |
+
p = sb.exec("mkdir", "-p", "/workspace", timeout=30)
|
| 101 |
+
p.wait()
|
| 102 |
+
|
| 103 |
+
# Create Maven project structure
|
| 104 |
+
# Create pom.xml
|
| 105 |
+
pom_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
|
| 106 |
+
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
| 107 |
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
| 108 |
+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
| 109 |
+
<modelVersion>4.0.0</modelVersion>
|
| 110 |
+
<groupId>com.example</groupId>
|
| 111 |
+
<artifactId>{module_name}</artifactId>
|
| 112 |
+
<version>1.0.0</version>
|
| 113 |
+
<properties>
|
| 114 |
+
<maven.compiler.source>17</maven.compiler.source>
|
| 115 |
+
<maven.compiler.target>17</maven.compiler.target>
|
| 116 |
+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
| 117 |
+
</properties>
|
| 118 |
+
<dependencies>
|
| 119 |
+
<dependency>
|
| 120 |
+
<groupId>org.junit.jupiter</groupId>
|
| 121 |
+
<artifactId>junit-jupiter</artifactId>
|
| 122 |
+
<version>5.9.0</version>
|
| 123 |
+
<scope>test</scope>
|
| 124 |
+
</dependency>
|
| 125 |
+
</dependencies>
|
| 126 |
+
<build>
|
| 127 |
+
<plugins>
|
| 128 |
+
<plugin>
|
| 129 |
+
<groupId>org.apache.maven.plugins</groupId>
|
| 130 |
+
<artifactId>maven-surefire-plugin</artifactId>
|
| 131 |
+
<version>2.22.2</version>
|
| 132 |
+
</plugin>
|
| 133 |
+
</plugins>
|
| 134 |
+
</build>
|
| 135 |
+
</project>"""
|
| 136 |
+
|
| 137 |
+
# Upload files to sandbox
|
| 138 |
+
with sb.open(f"/workspace/{module_name}.java", "w") as f:
|
| 139 |
+
f.write(code)
|
| 140 |
+
with sb.open(f"/workspace/{module_name}Test.java", "w") as f:
|
| 141 |
+
f.write(tests)
|
| 142 |
+
with sb.open(f"/workspace/pom.xml", "w") as f:
|
| 143 |
+
f.write(pom_xml)
|
| 144 |
+
# Run Maven tests
|
| 145 |
+
p = sb.exec("bash", "-c", "cd /workspace && mvn test -q 2>&1", timeout=120)
|
| 146 |
+
p.wait()
|
| 147 |
+
|
| 148 |
+
stdout = p.stdout.read()
|
| 149 |
+
stderr = p.stderr.read()
|
| 150 |
+
|
| 151 |
+
logger.info(f"Maven test output: {stdout}")
|
| 152 |
+
if p.returncode == 0:
|
| 153 |
+
return {
|
| 154 |
+
"success": True,
|
| 155 |
+
"tests_run": 1,
|
| 156 |
+
"tests_passed": 1,
|
| 157 |
+
"tests_failed": 0,
|
| 158 |
+
"stdout": stdout,
|
| 159 |
+
"stderr": stderr,
|
| 160 |
+
"execution_mode": "modal",
|
| 161 |
+
"language": "java"
|
| 162 |
+
}
|
| 163 |
+
else:
|
| 164 |
+
return {
|
| 165 |
+
"success": False,
|
| 166 |
+
"error": f"Tests failed: {stderr}",
|
| 167 |
+
"tests_run": 1,
|
| 168 |
+
"tests_passed": 0,
|
| 169 |
+
"tests_failed": 1,
|
| 170 |
+
"stdout": stdout,
|
| 171 |
+
"stderr": stderr,
|
| 172 |
+
"execution_mode": "modal",
|
| 173 |
+
"language": "java"
|
| 174 |
+
}
|
| 175 |
+
except Exception as e:
|
| 176 |
+
logger.error(f"Java sandbox execution failed: {e}")
|
| 177 |
+
return {
|
| 178 |
+
"success": False,
|
| 179 |
+
"error": f"Java execution error: {str(e)}",
|
| 180 |
+
"tests_run": 0,
|
| 181 |
+
"tests_passed": 0,
|
| 182 |
+
"tests_failed": 0,
|
| 183 |
+
"execution_mode": "modal",
|
| 184 |
+
"language": "java"
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
def _execute_javascript_in_sandbox(sb: modal.Sandbox, code: str, tests: str,
|
| 188 |
+
module_name: str, language: str = 'javascript') -> Dict:
|
| 189 |
+
"""Execute JavaScript/TypeScript tests in Modal Sandbox using Jest."""
|
| 190 |
+
try:
|
| 191 |
+
# Ensure workspace directory exists
|
| 192 |
+
p = sb.exec("mkdir", "-p", "/workspace", timeout=30)
|
| 193 |
+
p.wait()
|
| 194 |
+
ext = '.ts' if language == 'typescript' else '.js'
|
| 195 |
+
|
| 196 |
+
# Create package.json
|
| 197 |
+
package_json = {
|
| 198 |
+
"name": module_name.replace('_', '-'),
|
| 199 |
+
"version": "1.0.0",
|
| 200 |
+
"description": "Test suite",
|
| 201 |
+
"scripts": {
|
| 202 |
+
"test": "jest --json"
|
| 203 |
+
},
|
| 204 |
+
"devDependencies": {
|
| 205 |
+
"jest": "^29.0.0"
|
| 206 |
+
}
|
| 207 |
+
}
|
| 208 |
+
|
| 209 |
+
# For JavaScript, use ES modules with proper Jest config
|
| 210 |
+
# For TypeScript, use ts-jest preset
|
| 211 |
+
if language == 'javascript':
|
| 212 |
+
package_json["type"] = "module"
|
| 213 |
+
elif language == 'typescript':
|
| 214 |
+
package_json["devDependencies"]["ts-jest"] = "^29.0.0"
|
| 215 |
+
package_json["devDependencies"]["typescript"] = "^5.0.0"
|
| 216 |
+
package_json["devDependencies"]["@types/jest"] = "^29.0.0"
|
| 217 |
+
|
| 218 |
+
# Create Jest config
|
| 219 |
+
jest_config = {
|
| 220 |
+
"testEnvironment": "node",
|
| 221 |
+
"testMatch": ["**/*.test.js", "**/*.test.ts"]
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
if language == 'javascript':
|
| 225 |
+
# Configure Jest for ES modules
|
| 226 |
+
jest_config["transform"] = {}
|
| 227 |
+
jest_config["extensionsToTreatAsEsm"] = [".js"]
|
| 228 |
+
elif language == 'typescript':
|
| 229 |
+
jest_config["preset"] = "ts-jest"
|
| 230 |
+
jest_config["moduleNameMapper"] = {
|
| 231 |
+
"^(\\.{1,2}/.*)\\.ts$": "$1"
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
# Upload files to sandbox
|
| 235 |
+
with sb.open(f"/workspace/{module_name}{ext}", "w") as f:
|
| 236 |
+
f.write(code)
|
| 237 |
+
with sb.open(f"/workspace/{module_name}.test{ext}", "w") as f:
|
| 238 |
+
f.write(tests)
|
| 239 |
+
with sb.open(f"/workspace/package.json", "w") as f:
|
| 240 |
+
f.write(json.dumps(package_json, indent=2))
|
| 241 |
+
with sb.open(f"/workspace/jest.config.json", "w") as f:
|
| 242 |
+
f.write(json.dumps(jest_config, indent=2))
|
| 243 |
+
|
| 244 |
+
# For TypeScript, create tsconfig.json
|
| 245 |
+
if language == 'typescript':
|
| 246 |
+
tsconfig = {
|
| 247 |
+
"compilerOptions": {
|
| 248 |
+
"target": "ES2020",
|
| 249 |
+
"module": "commonjs",
|
| 250 |
+
"lib": ["ES2020"],
|
| 251 |
+
"strict": True,
|
| 252 |
+
"esModuleInterop": True,
|
| 253 |
+
"skipLibCheck": True,
|
| 254 |
+
"forceConsistentCasingInFileNames": True,
|
| 255 |
+
"resolveJsonModule": True,
|
| 256 |
+
"moduleResolution": "node",
|
| 257 |
+
"types": ["jest", "node"]
|
| 258 |
+
},
|
| 259 |
+
"include": ["*.ts"],
|
| 260 |
+
"exclude": ["node_modules"]
|
| 261 |
+
}
|
| 262 |
+
with sb.open(f"/workspace/tsconfig.json", "w") as f:
|
| 263 |
+
f.write(json.dumps(tsconfig, indent=2))
|
| 264 |
+
|
| 265 |
+
# Install dependencies and run tests
|
| 266 |
+
p = sb.exec("bash", "-c",
|
| 267 |
+
"cd /workspace && npm install --legacy-peer-deps && npm test 2>&1",
|
| 268 |
+
timeout=180)
|
| 269 |
+
p.wait()
|
| 270 |
+
|
| 271 |
+
stdout = p.stdout.read()
|
| 272 |
+
stderr = p.stderr.read()
|
| 273 |
+
|
| 274 |
+
logger.info(f"Jest test output: {stdout}")
|
| 275 |
+
|
| 276 |
+
# Parse Jest JSON output if available
|
| 277 |
+
try:
|
| 278 |
+
# Extract JSON from output (Jest outputs to stdout)
|
| 279 |
+
lines = stdout.split('\n')
|
| 280 |
+
json_str = None
|
| 281 |
+
for line in lines:
|
| 282 |
+
if line.strip().startswith('{') and 'numTotalTests' in line:
|
| 283 |
+
json_str = line
|
| 284 |
+
break
|
| 285 |
+
|
| 286 |
+
if json_str:
|
| 287 |
+
result = json.loads(json_str)
|
| 288 |
+
tests_run = result.get('numTotalTests', 0)
|
| 289 |
+
tests_passed = result.get('numPassedTests', 0)
|
| 290 |
+
tests_failed = result.get('numFailedTests', 0)
|
| 291 |
+
success = result.get('success', False)
|
| 292 |
+
else:
|
| 293 |
+
tests_run = 1 if p.returncode == 0 else 1
|
| 294 |
+
tests_passed = 1 if p.returncode == 0 else 0
|
| 295 |
+
tests_failed = 0 if p.returncode == 0 else 1
|
| 296 |
+
success = p.returncode == 0
|
| 297 |
+
except Exception as parse_error:
|
| 298 |
+
logger.warning(f"Could not parse Jest JSON output: {parse_error}")
|
| 299 |
+
tests_run = 1
|
| 300 |
+
tests_passed = 1 if p.returncode == 0 else 0
|
| 301 |
+
tests_failed = 0 if p.returncode == 0 else 1
|
| 302 |
+
success = p.returncode == 0
|
| 303 |
+
|
| 304 |
+
return {
|
| 305 |
+
"success": success,
|
| 306 |
+
"tests_run": tests_run,
|
| 307 |
+
"tests_passed": tests_passed,
|
| 308 |
+
"tests_failed": tests_failed,
|
| 309 |
+
"stdout": stdout,
|
| 310 |
+
"stderr": stderr,
|
| 311 |
+
"execution_mode": "modal",
|
| 312 |
+
"language": language
|
| 313 |
+
}
|
| 314 |
+
except Exception as e:
|
| 315 |
+
logger.error(f"JavaScript sandbox execution failed: {e}")
|
| 316 |
+
return {
|
| 317 |
+
"success": False,
|
| 318 |
+
"error": f"{language} execution error: {str(e)}",
|
| 319 |
+
"tests_run": 0,
|
| 320 |
+
"tests_passed": 0,
|
| 321 |
+
"tests_failed": 0,
|
| 322 |
+
"execution_mode": "modal",
|
| 323 |
+
"language": language
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
def execute_in_modal(code: str, tests: str, requirements: List[str],
|
| 327 |
+
module_name: str, language: str) -> Dict:
|
| 328 |
+
"""
|
| 329 |
+
Execute tests in Modal Sandbox with proper image configuration.
|
| 330 |
+
Uses Sandbox.exec() for better multi-language support.
|
| 331 |
+
|
| 332 |
+
Args:
|
| 333 |
+
code: Source code
|
| 334 |
+
tests: Test code
|
| 335 |
+
requirements: Package requirements
|
| 336 |
+
module_name: Module name
|
| 337 |
+
language: Programming language
|
| 338 |
+
|
| 339 |
+
Returns:
|
| 340 |
+
Test execution results
|
| 341 |
+
"""
|
| 342 |
+
lang_lower = language.lower()
|
| 343 |
+
|
| 344 |
+
if lang_lower not in LANGUAGE_IMAGES:
|
| 345 |
+
return {
|
| 346 |
+
"success": False,
|
| 347 |
+
"error": f"Unsupported language: {language}",
|
| 348 |
+
"tests_run": 0,
|
| 349 |
+
"tests_passed": 0,
|
| 350 |
+
"tests_failed": 0,
|
| 351 |
+
"execution_mode": "unsupported",
|
| 352 |
+
"language": language
|
| 353 |
+
}
|
| 354 |
+
|
| 355 |
+
try:
|
| 356 |
+
logger.info(f"Executing {language} tests in Modal Sandbox...")
|
| 357 |
+
|
| 358 |
+
# Get the appropriate image for this language
|
| 359 |
+
image = LANGUAGE_IMAGES[lang_lower]
|
| 360 |
+
|
| 361 |
+
# Create app for this execution
|
| 362 |
+
app = modal.App.lookup("legacy-code-validator", create_if_missing=True)
|
| 363 |
+
|
| 364 |
+
# Create sandbox with appropriate image
|
| 365 |
+
with modal.enable_output():
|
| 366 |
+
sb = modal.Sandbox.create(
|
| 367 |
+
image=image,
|
| 368 |
+
app=app,
|
| 369 |
+
timeout=300,
|
| 370 |
+
cpu=2.0,
|
| 371 |
+
memory=4096
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
try:
|
| 375 |
+
# Dispatch to language-specific executor
|
| 376 |
+
if lang_lower == 'python':
|
| 377 |
+
result = _execute_python_in_sandbox(sb, code, tests, module_name)
|
| 378 |
+
elif lang_lower == 'java':
|
| 379 |
+
result = _execute_java_in_sandbox(sb, code, tests, module_name)
|
| 380 |
+
elif lang_lower in ('javascript', 'typescript'):
|
| 381 |
+
result = _execute_javascript_in_sandbox(sb, code, tests, module_name, lang_lower)
|
| 382 |
+
else:
|
| 383 |
+
result = {
|
| 384 |
+
"success": False,
|
| 385 |
+
"error": f"No executor for language: {language}",
|
| 386 |
+
"tests_run": 0,
|
| 387 |
+
"tests_passed": 0,
|
| 388 |
+
"tests_failed": 0,
|
| 389 |
+
"execution_mode": "modal",
|
| 390 |
+
"language": language
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
result['execution_mode'] = 'modal'
|
| 394 |
+
return result
|
| 395 |
+
finally:
|
| 396 |
+
sb.terminate()
|
| 397 |
+
|
| 398 |
+
except Exception as e:
|
| 399 |
+
logger.error(f"Modal sandbox execution failed: {e}", exc_info=True)
|
| 400 |
+
return {
|
| 401 |
+
"success": False,
|
| 402 |
+
"error": f"Modal sandbox error: {str(e)}",
|
| 403 |
+
"tests_run": 0,
|
| 404 |
+
"tests_passed": 0,
|
| 405 |
+
"tests_failed": 0,
|
| 406 |
+
"execution_mode": "modal_error",
|
| 407 |
+
"language": language
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
else:
|
| 411 |
+
# Stub when Modal not available
|
| 412 |
+
def execute_in_modal(code: str, tests: str, requirements: List[str],
|
| 413 |
+
module_name: str, language: str) -> Dict:
|
| 414 |
+
"""Stub function when Modal is not available."""
|
| 415 |
+
return {
|
| 416 |
+
"success": False,
|
| 417 |
+
"error": "Modal not available",
|
| 418 |
+
"tests_run": 0,
|
| 419 |
+
"tests_passed": 0,
|
| 420 |
+
"tests_failed": 0,
|
| 421 |
+
"execution_mode": "modal_unavailable",
|
| 422 |
+
"language": language
|
| 423 |
+
}
|
src/sandbox/runners/__init__.py
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Language-specific test runners for Modal sandbox execution.
|
| 3 |
+
Each runner handles project structure, build files, and test execution for its language.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from .python_runner import run_python_tests
|
| 7 |
+
from .java_runner import run_java_tests
|
| 8 |
+
from .javascript_runner import run_javascript_tests
|
| 9 |
+
|
| 10 |
+
__all__ = [
|
| 11 |
+
'run_python_tests',
|
| 12 |
+
'run_java_tests',
|
| 13 |
+
'run_javascript_tests',
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
# Registry of all available runners
|
| 17 |
+
LANGUAGE_RUNNERS = {
|
| 18 |
+
'python': run_python_tests,
|
| 19 |
+
'java': run_java_tests,
|
| 20 |
+
'javascript': run_javascript_tests,
|
| 21 |
+
'typescript': run_javascript_tests, # TypeScript uses JS runner
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def get_runner_for_language(language: str):
|
| 26 |
+
"""Get the appropriate test runner function for a language."""
|
| 27 |
+
return LANGUAGE_RUNNERS.get(language.lower())
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def is_runner_available(language: str) -> bool:
|
| 31 |
+
"""Check if a test runner is available for a language."""
|
| 32 |
+
return language.lower() in LANGUAGE_RUNNERS
|
src/sandbox/runners/java_runner.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Java test runner for Modal sandbox execution.
|
| 3 |
+
Handles Maven project structure, pom.xml generation, and JUnit 5 execution.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import subprocess
|
| 7 |
+
import tempfile
|
| 8 |
+
import time
|
| 9 |
+
import logging
|
| 10 |
+
import re
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Dict, List
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _extract_class_name(code: str, module_name: str) -> str:
|
| 18 |
+
"""Extract Java class name from code."""
|
| 19 |
+
match = re.search(r'public\s+class\s+(\w+)', code)
|
| 20 |
+
if match:
|
| 21 |
+
return match.group(1)
|
| 22 |
+
|
| 23 |
+
# Fallback: convert module_name to PascalCase
|
| 24 |
+
return ''.join(word.capitalize() for word in module_name.split('_'))
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _create_maven_project(tmpdir: Path, module_name: str, code: str, tests: str) -> str:
|
| 28 |
+
"""
|
| 29 |
+
Create Maven project structure with proper directory layout.
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
Class name extracted from code
|
| 33 |
+
"""
|
| 34 |
+
# Extract class names
|
| 35 |
+
main_class = _extract_class_name(code, module_name)
|
| 36 |
+
test_class = _extract_class_name(tests, f"{module_name}Test")
|
| 37 |
+
|
| 38 |
+
# Create Maven directory structure
|
| 39 |
+
src_main = tmpdir / "src" / "main" / "java" / "com" / "modernizer"
|
| 40 |
+
src_test = tmpdir / "src" / "test" / "java" / "com" / "modernizer"
|
| 41 |
+
src_main.mkdir(parents=True)
|
| 42 |
+
src_test.mkdir(parents=True)
|
| 43 |
+
|
| 44 |
+
# Add package declaration if not present
|
| 45 |
+
if "package " not in code:
|
| 46 |
+
code = "package com.modernizer;\n\n" + code
|
| 47 |
+
if "package " not in tests:
|
| 48 |
+
tests = "package com.modernizer;\n\n" + tests
|
| 49 |
+
|
| 50 |
+
# Write source files
|
| 51 |
+
(src_main / f"{main_class}.java").write_text(code, encoding='utf-8')
|
| 52 |
+
(src_test / f"{test_class}.java").write_text(tests, encoding='utf-8')
|
| 53 |
+
|
| 54 |
+
# Generate pom.xml
|
| 55 |
+
pom_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
|
| 56 |
+
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
| 57 |
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
| 58 |
+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
|
| 59 |
+
http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
| 60 |
+
<modelVersion>4.0.0</modelVersion>
|
| 61 |
+
|
| 62 |
+
<groupId>com.modernizer</groupId>
|
| 63 |
+
<artifactId>{module_name}</artifactId>
|
| 64 |
+
<version>1.0-SNAPSHOT</version>
|
| 65 |
+
<packaging>jar</packaging>
|
| 66 |
+
|
| 67 |
+
<properties>
|
| 68 |
+
<maven.compiler.source>17</maven.compiler.source>
|
| 69 |
+
<maven.compiler.target>17</maven.compiler.target>
|
| 70 |
+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
| 71 |
+
<junit.version>5.10.1</junit.version>
|
| 72 |
+
</properties>
|
| 73 |
+
|
| 74 |
+
<dependencies>
|
| 75 |
+
<!-- JUnit 5 -->
|
| 76 |
+
<dependency>
|
| 77 |
+
<groupId>org.junit.jupiter</groupId>
|
| 78 |
+
<artifactId>junit-jupiter</artifactId>
|
| 79 |
+
<version>${{junit.version}}</version>
|
| 80 |
+
<scope>test</scope>
|
| 81 |
+
</dependency>
|
| 82 |
+
|
| 83 |
+
<!-- Mockito for mocking -->
|
| 84 |
+
<dependency>
|
| 85 |
+
<groupId>org.mockito</groupId>
|
| 86 |
+
<artifactId>mockito-core</artifactId>
|
| 87 |
+
<version>5.7.0</version>
|
| 88 |
+
<scope>test</scope>
|
| 89 |
+
</dependency>
|
| 90 |
+
|
| 91 |
+
<dependency>
|
| 92 |
+
<groupId>org.assertj</groupId>
|
| 93 |
+
<artifactId>assertj-core</artifactId>
|
| 94 |
+
<version>3.24.2</version>
|
| 95 |
+
<scope>test</scope>
|
| 96 |
+
</dependency>
|
| 97 |
+
|
| 98 |
+
<!-- Servlet API -->
|
| 99 |
+
<dependency>
|
| 100 |
+
<groupId>javax.servlet</groupId>
|
| 101 |
+
<artifactId>javax.servlet-api</artifactId>
|
| 102 |
+
<version>4.0.1</version>
|
| 103 |
+
<scope>provided</scope>
|
| 104 |
+
</dependency>
|
| 105 |
+
</dependencies>
|
| 106 |
+
|
| 107 |
+
<build>
|
| 108 |
+
<plugins>
|
| 109 |
+
<!-- Maven Compiler Plugin -->
|
| 110 |
+
<plugin>
|
| 111 |
+
<groupId>org.apache.maven.plugins</groupId>
|
| 112 |
+
<artifactId>maven-compiler-plugin</artifactId>
|
| 113 |
+
<version>3.11.0</version>
|
| 114 |
+
<configuration>
|
| 115 |
+
<source>17</source>
|
| 116 |
+
<target>17</target>
|
| 117 |
+
</configuration>
|
| 118 |
+
</plugin>
|
| 119 |
+
|
| 120 |
+
<!-- Maven Surefire Plugin for running tests -->
|
| 121 |
+
<plugin>
|
| 122 |
+
<groupId>org.apache.maven.plugins</groupId>
|
| 123 |
+
<artifactId>maven-surefire-plugin</artifactId>
|
| 124 |
+
<version>3.2.2</version>
|
| 125 |
+
<configuration>
|
| 126 |
+
<includes>
|
| 127 |
+
<include>**/*Test.java</include>
|
| 128 |
+
</includes>
|
| 129 |
+
</configuration>
|
| 130 |
+
</plugin>
|
| 131 |
+
|
| 132 |
+
<!-- JaCoCo for code coverage -->
|
| 133 |
+
<plugin>
|
| 134 |
+
<groupId>org.jacoco</groupId>
|
| 135 |
+
<artifactId>jacoco-maven-plugin</artifactId>
|
| 136 |
+
<version>0.8.11</version>
|
| 137 |
+
<executions>
|
| 138 |
+
<execution>
|
| 139 |
+
<goals>
|
| 140 |
+
<goal>prepare-agent</goal>
|
| 141 |
+
</goals>
|
| 142 |
+
</execution>
|
| 143 |
+
<execution>
|
| 144 |
+
<id>report</id>
|
| 145 |
+
<phase>test</phase>
|
| 146 |
+
<goals>
|
| 147 |
+
<goal>report</goal>
|
| 148 |
+
</goals>
|
| 149 |
+
</execution>
|
| 150 |
+
</executions>
|
| 151 |
+
</plugin>
|
| 152 |
+
</plugins>
|
| 153 |
+
</build>
|
| 154 |
+
</project>
|
| 155 |
+
"""
|
| 156 |
+
(tmpdir / "pom.xml").write_text(pom_xml, encoding='utf-8')
|
| 157 |
+
|
| 158 |
+
return main_class
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
def _validate_java_tests(tests: str) -> tuple:
|
| 162 |
+
"""
|
| 163 |
+
Validate Java test code before execution.
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
(is_valid, error_message)
|
| 167 |
+
"""
|
| 168 |
+
# Check for JUnit 5 annotations
|
| 169 |
+
if "@Test" not in tests:
|
| 170 |
+
return False, "No @Test annotations found (required for JUnit 5)"
|
| 171 |
+
|
| 172 |
+
# Check for JUnit imports
|
| 173 |
+
if "org.junit" not in tests:
|
| 174 |
+
return False, "Missing JUnit imports (import org.junit.jupiter.api.Test)"
|
| 175 |
+
|
| 176 |
+
# Check for test class
|
| 177 |
+
if "class" not in tests:
|
| 178 |
+
return False, "No test class found"
|
| 179 |
+
|
| 180 |
+
return True, ""
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def run_java_tests(code: str, tests: str, requirements: List[str], module_name: str) -> Dict:
|
| 184 |
+
"""
|
| 185 |
+
Run Java tests using Maven and JUnit 5 in Modal container.
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
code: Java source code
|
| 189 |
+
tests: JUnit test code
|
| 190 |
+
requirements: List of Maven dependencies (not used currently)
|
| 191 |
+
module_name: Name of the module
|
| 192 |
+
|
| 193 |
+
Returns:
|
| 194 |
+
Dictionary with test results
|
| 195 |
+
"""
|
| 196 |
+
# Validate tests before execution
|
| 197 |
+
is_valid, error_msg = _validate_java_tests(tests)
|
| 198 |
+
if not is_valid:
|
| 199 |
+
logger.error(f"Test validation failed: {error_msg}")
|
| 200 |
+
return {
|
| 201 |
+
"success": False,
|
| 202 |
+
"error": f"Test validation failed: {error_msg}",
|
| 203 |
+
"tests_run": 0,
|
| 204 |
+
"tests_passed": 0,
|
| 205 |
+
"tests_failed": 0,
|
| 206 |
+
"execution_mode": "modal",
|
| 207 |
+
"language": "java"
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 211 |
+
tmpdir_path = Path(tmpdir)
|
| 212 |
+
|
| 213 |
+
try:
|
| 214 |
+
# Create Maven project structure
|
| 215 |
+
class_name = _create_maven_project(tmpdir_path, module_name, code, tests)
|
| 216 |
+
logger.info(f"Created Maven project for class: {class_name}")
|
| 217 |
+
except Exception as e:
|
| 218 |
+
logger.error(f"Failed to create Maven project: {e}")
|
| 219 |
+
return {
|
| 220 |
+
"success": False,
|
| 221 |
+
"error": f"Project setup failed: {str(e)}",
|
| 222 |
+
"tests_run": 0,
|
| 223 |
+
"tests_passed": 0,
|
| 224 |
+
"tests_failed": 0,
|
| 225 |
+
"execution_mode": "modal",
|
| 226 |
+
"language": "java"
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
start_time = time.time()
|
| 230 |
+
|
| 231 |
+
try:
|
| 232 |
+
# Run Maven clean test
|
| 233 |
+
logger.info("Running Maven tests...")
|
| 234 |
+
result = subprocess.run(
|
| 235 |
+
["mvn", "clean", "test", "-B", "-q"],
|
| 236 |
+
cwd=tmpdir,
|
| 237 |
+
capture_output=True,
|
| 238 |
+
text=True,
|
| 239 |
+
timeout=300 # 5 minutes for Maven
|
| 240 |
+
)
|
| 241 |
+
except subprocess.TimeoutExpired:
|
| 242 |
+
return {
|
| 243 |
+
"success": False,
|
| 244 |
+
"error": "Maven test execution timeout (>5 minutes)",
|
| 245 |
+
"tests_run": 0,
|
| 246 |
+
"tests_passed": 0,
|
| 247 |
+
"tests_failed": 0,
|
| 248 |
+
"execution_time": 300.0,
|
| 249 |
+
"execution_mode": "modal",
|
| 250 |
+
"language": "java"
|
| 251 |
+
}
|
| 252 |
+
except FileNotFoundError:
|
| 253 |
+
return {
|
| 254 |
+
"success": False,
|
| 255 |
+
"error": "Maven (mvn) not found in container",
|
| 256 |
+
"tests_run": 0,
|
| 257 |
+
"tests_passed": 0,
|
| 258 |
+
"tests_failed": 0,
|
| 259 |
+
"execution_mode": "modal",
|
| 260 |
+
"language": "java"
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
execution_time = time.time() - start_time
|
| 264 |
+
stdout = result.stdout[:10000] # Truncate to prevent memory issues
|
| 265 |
+
stderr = result.stderr[:10000]
|
| 266 |
+
|
| 267 |
+
# Check for compilation/build failures first
|
| 268 |
+
if "BUILD FAILURE" in stdout or "COMPILATION ERROR" in stdout or "BUILD FAILURE" in stderr:
|
| 269 |
+
error_msg = "Maven build failed"
|
| 270 |
+
# Try to extract specific error
|
| 271 |
+
if "COMPILATION ERROR" in stdout:
|
| 272 |
+
error_msg = "Java compilation error"
|
| 273 |
+
elif "[ERROR]" in stdout:
|
| 274 |
+
# Extract first error line
|
| 275 |
+
for line in stdout.split('\n'):
|
| 276 |
+
if '[ERROR]' in line and 'Failed to execute goal' not in line:
|
| 277 |
+
error_msg = line.strip()
|
| 278 |
+
break
|
| 279 |
+
|
| 280 |
+
return {
|
| 281 |
+
"success": False,
|
| 282 |
+
"error": error_msg,
|
| 283 |
+
"tests_run": 0,
|
| 284 |
+
"tests_passed": 0,
|
| 285 |
+
"tests_failed": 0,
|
| 286 |
+
"execution_mode": "modal",
|
| 287 |
+
"language": "java",
|
| 288 |
+
"stdout": stdout,
|
| 289 |
+
"stderr": stderr
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
# Parse Maven Surefire output
|
| 293 |
+
# Format: "Tests run: X, Failures: Y, Errors: Z, Skipped: W"
|
| 294 |
+
tests_run = 0
|
| 295 |
+
tests_passed = 0
|
| 296 |
+
tests_failed = 0
|
| 297 |
+
tests_errors = 0
|
| 298 |
+
tests_skipped = 0
|
| 299 |
+
|
| 300 |
+
match = re.search(r'Tests run: (\d+),\s*Failures: (\d+),\s*Errors: (\d+),\s*Skipped: (\d+)', stdout)
|
| 301 |
+
if match:
|
| 302 |
+
tests_run = int(match.group(1))
|
| 303 |
+
failures = int(match.group(2))
|
| 304 |
+
tests_errors = int(match.group(3))
|
| 305 |
+
tests_skipped = int(match.group(4))
|
| 306 |
+
tests_failed = failures + tests_errors
|
| 307 |
+
tests_passed = tests_run - tests_failed - tests_skipped
|
| 308 |
+
elif tests_run == 0 and result.returncode == 0:
|
| 309 |
+
# Maven succeeded but no tests found - this is suspicious
|
| 310 |
+
logger.warning("Maven succeeded but no tests were detected")
|
| 311 |
+
return {
|
| 312 |
+
"success": False,
|
| 313 |
+
"error": "No tests detected by Maven Surefire (missing @Test annotations?)",
|
| 314 |
+
"tests_run": 0,
|
| 315 |
+
"tests_passed": 0,
|
| 316 |
+
"tests_failed": 0,
|
| 317 |
+
"execution_mode": "modal",
|
| 318 |
+
"language": "java",
|
| 319 |
+
"stdout": stdout,
|
| 320 |
+
"stderr": stderr
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
# Try to extract coverage from JaCoCo report
|
| 324 |
+
coverage_percent = 0.0
|
| 325 |
+
jacoco_report = tmpdir_path / "target" / "site" / "jacoco" / "index.html"
|
| 326 |
+
if jacoco_report.exists():
|
| 327 |
+
try:
|
| 328 |
+
report_content = jacoco_report.read_text()
|
| 329 |
+
# Extract coverage percentage from JaCoCo HTML report
|
| 330 |
+
cov_match = re.search(r'Total.*?(\d+)%', report_content)
|
| 331 |
+
if cov_match:
|
| 332 |
+
coverage_percent = float(cov_match.group(1))
|
| 333 |
+
except Exception as e:
|
| 334 |
+
logger.warning(f"Failed to parse JaCoCo coverage: {e}")
|
| 335 |
+
|
| 336 |
+
return {
|
| 337 |
+
"success": result.returncode == 0,
|
| 338 |
+
"tests_run": tests_run,
|
| 339 |
+
"tests_passed": tests_passed,
|
| 340 |
+
"tests_failed": tests_failed,
|
| 341 |
+
"tests_errors": tests_errors,
|
| 342 |
+
"tests_skipped": tests_skipped,
|
| 343 |
+
"execution_time": round(execution_time, 2),
|
| 344 |
+
"coverage_percent": coverage_percent,
|
| 345 |
+
"stdout": stdout,
|
| 346 |
+
"stderr": stderr,
|
| 347 |
+
"exit_code": result.returncode,
|
| 348 |
+
"execution_mode": "modal",
|
| 349 |
+
"language": "java"
|
| 350 |
+
}
|
src/sandbox/runners/javascript_runner.py
ADDED
|
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
JavaScript/TypeScript test runner for Modal sandbox execution.
|
| 3 |
+
Handles Node.js project structure, package.json generation, and Jest execution.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import subprocess
|
| 7 |
+
import tempfile
|
| 8 |
+
import time
|
| 9 |
+
import logging
|
| 10 |
+
import json
|
| 11 |
+
import re
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from typing import Dict, List
|
| 14 |
+
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def _create_nodejs_project(tmpdir: Path, module_name: str, code: str, tests: str, language: str):
|
| 19 |
+
"""
|
| 20 |
+
Create Node.js project structure with package.json and config files.
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
tmpdir: Temporary directory path
|
| 24 |
+
module_name: Name of the module
|
| 25 |
+
code: Source code
|
| 26 |
+
tests: Test code
|
| 27 |
+
language: 'javascript' or 'typescript'
|
| 28 |
+
"""
|
| 29 |
+
ext = '.ts' if language == 'typescript' else '.js'
|
| 30 |
+
|
| 31 |
+
# Write source files
|
| 32 |
+
(tmpdir / f"{module_name}{ext}").write_text(code, encoding='utf-8')
|
| 33 |
+
(tmpdir / f"{module_name}.test{ext}").write_text(tests, encoding='utf-8')
|
| 34 |
+
|
| 35 |
+
# Generate package.json
|
| 36 |
+
package_json = {
|
| 37 |
+
"name": module_name.replace('_', '-'),
|
| 38 |
+
"version": "1.0.0",
|
| 39 |
+
"type": "module" if language == 'javascript' else None,
|
| 40 |
+
"description": "Modernized code test suite",
|
| 41 |
+
"main": f"{module_name}{ext}",
|
| 42 |
+
"scripts": {
|
| 43 |
+
"test": "NODE_OPTIONS=--experimental-vm-modules jest --coverage --verbose --no-cache" if language == 'javascript' else "jest --coverage --verbose --no-cache"
|
| 44 |
+
},
|
| 45 |
+
"devDependencies": {
|
| 46 |
+
"jest": "^29.7.0"
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
# Remove None values
|
| 51 |
+
package_json = {k: v for k, v in package_json.items() if v is not None}
|
| 52 |
+
|
| 53 |
+
if language == 'typescript':
|
| 54 |
+
package_json["devDependencies"].update({
|
| 55 |
+
"typescript": "^5.3.0",
|
| 56 |
+
"ts-jest": "^29.1.0",
|
| 57 |
+
"@types/jest": "^29.5.0",
|
| 58 |
+
"ts-node": "^10.9.0"
|
| 59 |
+
})
|
| 60 |
+
|
| 61 |
+
# Generate jest.config.js for TypeScript
|
| 62 |
+
jest_config = """module.exports = {
|
| 63 |
+
preset: 'ts-jest',
|
| 64 |
+
testEnvironment: 'node',
|
| 65 |
+
testMatch: ['**/*.test.ts'],
|
| 66 |
+
collectCoverageFrom: ['*.ts', '!*.test.ts', '!jest.config.js'],
|
| 67 |
+
coverageReporters: ['text', 'text-summary'],
|
| 68 |
+
verbose: true
|
| 69 |
+
};
|
| 70 |
+
"""
|
| 71 |
+
(tmpdir / "jest.config.js").write_text(jest_config, encoding='utf-8')
|
| 72 |
+
|
| 73 |
+
# Generate tsconfig.json
|
| 74 |
+
tsconfig = {
|
| 75 |
+
"compilerOptions": {
|
| 76 |
+
"target": "ES2020",
|
| 77 |
+
"module": "commonjs",
|
| 78 |
+
"lib": ["ES2020"],
|
| 79 |
+
"strict": True,
|
| 80 |
+
"esModuleInterop": True,
|
| 81 |
+
"skipLibCheck": True,
|
| 82 |
+
"forceConsistentCasingInFileNames": True,
|
| 83 |
+
"resolveJsonModule": True,
|
| 84 |
+
"moduleResolution": "node",
|
| 85 |
+
"types": ["jest", "node"]
|
| 86 |
+
},
|
| 87 |
+
"include": ["*.ts"],
|
| 88 |
+
"exclude": ["node_modules"]
|
| 89 |
+
}
|
| 90 |
+
(tmpdir / "tsconfig.json").write_text(json.dumps(tsconfig, indent=2), encoding='utf-8')
|
| 91 |
+
else:
|
| 92 |
+
# Generate jest.config.js for JavaScript with ES module support
|
| 93 |
+
jest_config = """module.exports = {
|
| 94 |
+
testEnvironment: 'node',
|
| 95 |
+
testMatch: ['**/*.test.js'],
|
| 96 |
+
collectCoverageFrom: ['*.js', '!*.test.js', '!jest.config.js'],
|
| 97 |
+
coverageReporters: ['text', 'text-summary'],
|
| 98 |
+
verbose: true,
|
| 99 |
+
transform: {},
|
| 100 |
+
extensionsToTreatAsEsm: ['.js'],
|
| 101 |
+
moduleNameMapper: {
|
| 102 |
+
'^(\\\\.{1,2}/.*)\\\\.js$': '$1',
|
| 103 |
+
},
|
| 104 |
+
};
|
| 105 |
+
"""
|
| 106 |
+
(tmpdir / "jest.config.js").write_text(jest_config, encoding='utf-8')
|
| 107 |
+
|
| 108 |
+
(tmpdir / "package.json").write_text(json.dumps(package_json, indent=2), encoding='utf-8')
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def _validate_javascript_tests(tests: str, language: str) -> tuple:
|
| 112 |
+
"""
|
| 113 |
+
Validate JavaScript/TypeScript test code before execution.
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
(is_valid, error_message)
|
| 117 |
+
"""
|
| 118 |
+
# Check for Jest test structure
|
| 119 |
+
if "describe(" not in tests and "test(" not in tests and "it(" not in tests:
|
| 120 |
+
return False, "No Jest test functions found (describe/test/it)"
|
| 121 |
+
|
| 122 |
+
# Check for imports
|
| 123 |
+
if "import" not in tests and "require" not in tests:
|
| 124 |
+
return False, "No import/require statements found"
|
| 125 |
+
|
| 126 |
+
# Check for expect assertions
|
| 127 |
+
if "expect(" not in tests:
|
| 128 |
+
return False, "No expect() assertions found"
|
| 129 |
+
|
| 130 |
+
return True, ""
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def run_javascript_tests(code: str, tests: str, requirements: List[str], module_name: str, language: str = 'javascript') -> Dict:
|
| 134 |
+
"""
|
| 135 |
+
Run JavaScript/TypeScript tests using Jest in Modal container.
|
| 136 |
+
|
| 137 |
+
Args:
|
| 138 |
+
code: JavaScript/TypeScript source code
|
| 139 |
+
tests: Jest test code
|
| 140 |
+
requirements: List of npm packages to install (not used currently)
|
| 141 |
+
module_name: Name of the module
|
| 142 |
+
language: 'javascript' or 'typescript'
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
Dictionary with test results
|
| 146 |
+
"""
|
| 147 |
+
# Validate tests before execution
|
| 148 |
+
is_valid, error_msg = _validate_javascript_tests(tests, language)
|
| 149 |
+
if not is_valid:
|
| 150 |
+
logger.error(f"Test validation failed: {error_msg}")
|
| 151 |
+
return {
|
| 152 |
+
"success": False,
|
| 153 |
+
"error": f"Test validation failed: {error_msg}",
|
| 154 |
+
"tests_run": 0,
|
| 155 |
+
"tests_passed": 0,
|
| 156 |
+
"tests_failed": 0,
|
| 157 |
+
"execution_mode": "modal",
|
| 158 |
+
"language": language
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 162 |
+
tmpdir_path = Path(tmpdir)
|
| 163 |
+
|
| 164 |
+
try:
|
| 165 |
+
# Create Node.js project structure
|
| 166 |
+
_create_nodejs_project(tmpdir_path, module_name, code, tests, language)
|
| 167 |
+
logger.info(f"Created Node.js project for {module_name} ({language})")
|
| 168 |
+
except Exception as e:
|
| 169 |
+
logger.error(f"Failed to create Node.js project: {e}")
|
| 170 |
+
return {
|
| 171 |
+
"success": False,
|
| 172 |
+
"error": f"Project setup failed: {str(e)}",
|
| 173 |
+
"tests_run": 0,
|
| 174 |
+
"tests_passed": 0,
|
| 175 |
+
"tests_failed": 0,
|
| 176 |
+
"execution_mode": "modal",
|
| 177 |
+
"language": language
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
start_time = time.time()
|
| 181 |
+
|
| 182 |
+
try:
|
| 183 |
+
# Install dependencies
|
| 184 |
+
logger.info("Installing npm dependencies...")
|
| 185 |
+
install_result = subprocess.run(
|
| 186 |
+
["npm", "install", "--silent", "--no-fund", "--no-audit"],
|
| 187 |
+
cwd=tmpdir,
|
| 188 |
+
capture_output=True,
|
| 189 |
+
text=True,
|
| 190 |
+
timeout=180 # 3 minutes for npm install
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
if install_result.returncode != 0:
|
| 194 |
+
logger.error(f"npm install failed with return code: {install_result.returncode}")
|
| 195 |
+
logger.error(f"npm install stderr: {install_result.stderr}")
|
| 196 |
+
logger.error(f"npm install stdout: {install_result.stdout}")
|
| 197 |
+
return {
|
| 198 |
+
"success": False,
|
| 199 |
+
"error": f"npm install failed: {install_result.stderr}",
|
| 200 |
+
"tests_run": 0,
|
| 201 |
+
"tests_passed": 0,
|
| 202 |
+
"tests_failed": 0,
|
| 203 |
+
"execution_mode": "modal",
|
| 204 |
+
"language": language
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
# Run tests
|
| 208 |
+
logger.info("Running Jest tests...")
|
| 209 |
+
result = subprocess.run(
|
| 210 |
+
["npm", "test", "--", "--ci"],
|
| 211 |
+
cwd=tmpdir,
|
| 212 |
+
capture_output=True,
|
| 213 |
+
text=True,
|
| 214 |
+
timeout=120 # 2 minutes for tests
|
| 215 |
+
)
|
| 216 |
+
except subprocess.TimeoutExpired as e:
|
| 217 |
+
return {
|
| 218 |
+
"success": False,
|
| 219 |
+
"error": f"Test execution timeout: {str(e)}",
|
| 220 |
+
"tests_run": 0,
|
| 221 |
+
"tests_passed": 0,
|
| 222 |
+
"tests_failed": 0,
|
| 223 |
+
"execution_time": 300.0,
|
| 224 |
+
"execution_mode": "modal",
|
| 225 |
+
"language": language
|
| 226 |
+
}
|
| 227 |
+
except FileNotFoundError:
|
| 228 |
+
return {
|
| 229 |
+
"success": False,
|
| 230 |
+
"error": "Node.js/npm not found in container",
|
| 231 |
+
"tests_run": 0,
|
| 232 |
+
"tests_passed": 0,
|
| 233 |
+
"tests_failed": 0,
|
| 234 |
+
"execution_mode": "modal",
|
| 235 |
+
"language": language
|
| 236 |
+
}
|
| 237 |
+
|
| 238 |
+
execution_time = time.time() - start_time
|
| 239 |
+
stdout = result.stdout[:10000] # Truncate to prevent memory issues
|
| 240 |
+
stderr = result.stderr[:10000]
|
| 241 |
+
|
| 242 |
+
# Parse Jest output - handle all possible formats
|
| 243 |
+
# Jest format examples:
|
| 244 |
+
# - "Tests: 5 passed, 5 total"
|
| 245 |
+
# - "Tests: 1 failed, 4 passed, 5 total"
|
| 246 |
+
# - "Tests: 2 skipped, 3 passed, 5 total"
|
| 247 |
+
# - "Tests: 1 todo, 4 passed, 5 total"
|
| 248 |
+
# - "Tests: 0 total"
|
| 249 |
+
tests_run = 0
|
| 250 |
+
tests_passed = 0
|
| 251 |
+
tests_failed = 0
|
| 252 |
+
tests_skipped = 0
|
| 253 |
+
|
| 254 |
+
# Look for "Tests:" line
|
| 255 |
+
tests_line_match = re.search(r'Tests:\s+(.+)', stdout)
|
| 256 |
+
if tests_line_match:
|
| 257 |
+
tests_line = tests_line_match.group(1)
|
| 258 |
+
|
| 259 |
+
# Extract total
|
| 260 |
+
total_match = re.search(r'(\d+)\s+total', tests_line)
|
| 261 |
+
if total_match:
|
| 262 |
+
tests_run = int(total_match.group(1))
|
| 263 |
+
|
| 264 |
+
# Extract passed
|
| 265 |
+
passed_match = re.search(r'(\d+)\s+passed', tests_line)
|
| 266 |
+
if passed_match:
|
| 267 |
+
tests_passed = int(passed_match.group(1))
|
| 268 |
+
|
| 269 |
+
# Extract failed
|
| 270 |
+
failed_match = re.search(r'(\d+)\s+failed', tests_line)
|
| 271 |
+
if failed_match:
|
| 272 |
+
tests_failed = int(failed_match.group(1))
|
| 273 |
+
|
| 274 |
+
# Extract skipped
|
| 275 |
+
skipped_match = re.search(r'(\d+)\s+skipped', tests_line)
|
| 276 |
+
if skipped_match:
|
| 277 |
+
tests_skipped = int(skipped_match.group(1))
|
| 278 |
+
|
| 279 |
+
# If we have total but not passed, calculate it
|
| 280 |
+
if tests_run > 0 and tests_passed == 0 and tests_failed == 0:
|
| 281 |
+
tests_passed = tests_run - tests_failed - tests_skipped
|
| 282 |
+
|
| 283 |
+
# Check for test suite failures (compilation errors, etc.)
|
| 284 |
+
if "Test Suites: " in stdout and " failed" in stdout:
|
| 285 |
+
suite_match = re.search(r'Test Suites:\s+(\d+)\s+failed', stdout)
|
| 286 |
+
if suite_match and tests_run == 0:
|
| 287 |
+
return {
|
| 288 |
+
"success": False,
|
| 289 |
+
"error": "Test suite failed to run (compilation/syntax error)",
|
| 290 |
+
"tests_run": 0,
|
| 291 |
+
"tests_passed": 0,
|
| 292 |
+
"tests_failed": 0,
|
| 293 |
+
"execution_mode": "modal",
|
| 294 |
+
"language": language,
|
| 295 |
+
"stdout": stdout,
|
| 296 |
+
"stderr": stderr
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
# Extract coverage percentage
|
| 300 |
+
coverage_percent = 0.0
|
| 301 |
+
# Jest coverage format: "All files | 85.71 | 75 | 100 | 85.71 |"
|
| 302 |
+
cov_match = re.search(r'All files\s*\|\s*([\d.]+)', stdout)
|
| 303 |
+
if cov_match:
|
| 304 |
+
coverage_percent = float(cov_match.group(1))
|
| 305 |
+
|
| 306 |
+
return {
|
| 307 |
+
"success": result.returncode == 0,
|
| 308 |
+
"tests_run": tests_run,
|
| 309 |
+
"tests_passed": tests_passed,
|
| 310 |
+
"tests_failed": tests_failed,
|
| 311 |
+
"execution_time": round(execution_time, 2),
|
| 312 |
+
"coverage_percent": coverage_percent,
|
| 313 |
+
"stdout": stdout,
|
| 314 |
+
"stderr": stderr,
|
| 315 |
+
"exit_code": result.returncode,
|
| 316 |
+
"execution_mode": "modal",
|
| 317 |
+
"language": language
|
| 318 |
+
}
|
src/sandbox/runners/python_runner.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Python test runner for Modal sandbox execution.
|
| 3 |
+
Handles pytest execution with proper path setup and result parsing.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import subprocess
|
| 7 |
+
import tempfile
|
| 8 |
+
import time
|
| 9 |
+
import logging
|
| 10 |
+
import re
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Dict, List
|
| 13 |
+
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _validate_python_tests(tests: str) -> tuple:
|
| 18 |
+
"""
|
| 19 |
+
Validate Python test code before execution.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
(is_valid, error_message)
|
| 23 |
+
"""
|
| 24 |
+
# Check for basic pytest structure
|
| 25 |
+
if "def test_" not in tests and "class Test" not in tests:
|
| 26 |
+
return False, "No test functions found (must start with 'test_' or be in 'Test' class)"
|
| 27 |
+
|
| 28 |
+
# Check for imports
|
| 29 |
+
if "import" not in tests:
|
| 30 |
+
return False, "No import statements found"
|
| 31 |
+
|
| 32 |
+
# Check for basic syntax issues
|
| 33 |
+
try:
|
| 34 |
+
compile(tests, '<string>', 'exec')
|
| 35 |
+
except SyntaxError as e:
|
| 36 |
+
return False, f"Syntax error in test code: {str(e)}"
|
| 37 |
+
|
| 38 |
+
return True, ""
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def run_python_tests(code: str, tests: str, requirements: List[str], module_name: str) -> Dict:
|
| 42 |
+
"""
|
| 43 |
+
Run Python tests using pytest in Modal container.
|
| 44 |
+
|
| 45 |
+
Args:
|
| 46 |
+
code: Python source code
|
| 47 |
+
tests: Pytest test code
|
| 48 |
+
requirements: List of pip packages to install
|
| 49 |
+
module_name: Name of the module
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
Dictionary with test results
|
| 53 |
+
"""
|
| 54 |
+
# Validate tests before execution
|
| 55 |
+
is_valid, error_msg = _validate_python_tests(tests)
|
| 56 |
+
if not is_valid:
|
| 57 |
+
logger.error(f"Test validation failed: {error_msg}")
|
| 58 |
+
return {
|
| 59 |
+
"success": False,
|
| 60 |
+
"error": f"Test validation failed: {error_msg}",
|
| 61 |
+
"tests_run": 0,
|
| 62 |
+
"tests_passed": 0,
|
| 63 |
+
"tests_failed": 0,
|
| 64 |
+
"execution_mode": "modal",
|
| 65 |
+
"language": "python"
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 69 |
+
tmpdir_path = Path(tmpdir)
|
| 70 |
+
|
| 71 |
+
# Write code and tests in same directory for proper imports
|
| 72 |
+
code_file = tmpdir_path / f"{module_name}.py"
|
| 73 |
+
test_file = tmpdir_path / f"test_{module_name}.py"
|
| 74 |
+
|
| 75 |
+
# Ensure tests have proper path setup
|
| 76 |
+
if "sys.path" not in tests and "import sys" not in tests:
|
| 77 |
+
path_setup = """import sys
|
| 78 |
+
import os
|
| 79 |
+
# Ensure module can be imported
|
| 80 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 81 |
+
|
| 82 |
+
"""
|
| 83 |
+
tests = path_setup + tests
|
| 84 |
+
|
| 85 |
+
code_file.write_text(code, encoding='utf-8')
|
| 86 |
+
test_file.write_text(tests, encoding='utf-8')
|
| 87 |
+
|
| 88 |
+
# Install additional requirements
|
| 89 |
+
if requirements:
|
| 90 |
+
try:
|
| 91 |
+
logger.info(f"Installing requirements: {requirements}")
|
| 92 |
+
install_result = subprocess.run(
|
| 93 |
+
["pip", "install", "-q", "--no-cache-dir"] + requirements,
|
| 94 |
+
capture_output=True,
|
| 95 |
+
text=True,
|
| 96 |
+
timeout=120
|
| 97 |
+
)
|
| 98 |
+
if install_result.returncode != 0:
|
| 99 |
+
logger.warning(f"Some requirements failed to install: {install_result.stderr}")
|
| 100 |
+
except Exception as e:
|
| 101 |
+
logger.warning(f"Failed to install requirements: {e}")
|
| 102 |
+
|
| 103 |
+
start_time = time.time()
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
# Run pytest with coverage and verbose output
|
| 107 |
+
result = subprocess.run(
|
| 108 |
+
[
|
| 109 |
+
"pytest",
|
| 110 |
+
str(test_file),
|
| 111 |
+
"-v",
|
| 112 |
+
"--tb=short",
|
| 113 |
+
"--timeout=30",
|
| 114 |
+
"-p", "no:warnings",
|
| 115 |
+
"--cov=" + module_name,
|
| 116 |
+
"--cov-report=term-missing"
|
| 117 |
+
],
|
| 118 |
+
cwd=tmpdir,
|
| 119 |
+
capture_output=True,
|
| 120 |
+
text=True,
|
| 121 |
+
timeout=120
|
| 122 |
+
)
|
| 123 |
+
except subprocess.TimeoutExpired:
|
| 124 |
+
return {
|
| 125 |
+
"success": False,
|
| 126 |
+
"error": "Test execution timeout (>2 minutes)",
|
| 127 |
+
"tests_run": 0,
|
| 128 |
+
"tests_passed": 0,
|
| 129 |
+
"tests_failed": 0,
|
| 130 |
+
"execution_time": 120.0,
|
| 131 |
+
"execution_mode": "modal",
|
| 132 |
+
"language": "python"
|
| 133 |
+
}
|
| 134 |
+
except FileNotFoundError:
|
| 135 |
+
return {
|
| 136 |
+
"success": False,
|
| 137 |
+
"error": "pytest not found in container",
|
| 138 |
+
"tests_run": 0,
|
| 139 |
+
"tests_passed": 0,
|
| 140 |
+
"tests_failed": 0,
|
| 141 |
+
"execution_mode": "modal",
|
| 142 |
+
"language": "python"
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
execution_time = time.time() - start_time
|
| 146 |
+
stdout = result.stdout[:10000] # Truncate to prevent memory issues
|
| 147 |
+
stderr = result.stderr[:10000]
|
| 148 |
+
|
| 149 |
+
# Parse pytest output from summary line (more reliable than counting)
|
| 150 |
+
# Format: "3 passed, 1 failed, 1 skipped in 0.5s" or "3 passed in 0.5s"
|
| 151 |
+
tests_run = 0
|
| 152 |
+
tests_passed = 0
|
| 153 |
+
tests_failed = 0
|
| 154 |
+
tests_errors = 0
|
| 155 |
+
tests_skipped = 0
|
| 156 |
+
|
| 157 |
+
# Look for summary line
|
| 158 |
+
summary_match = re.search(r'=+\s*(.*?)\s+in\s+[\d.]+s\s*=+', stdout)
|
| 159 |
+
if summary_match:
|
| 160 |
+
summary = summary_match.group(1)
|
| 161 |
+
|
| 162 |
+
# Parse each component
|
| 163 |
+
passed_match = re.search(r'(\d+)\s+passed', summary)
|
| 164 |
+
if passed_match:
|
| 165 |
+
tests_passed = int(passed_match.group(1))
|
| 166 |
+
|
| 167 |
+
failed_match = re.search(r'(\d+)\s+failed', summary)
|
| 168 |
+
if failed_match:
|
| 169 |
+
tests_failed = int(failed_match.group(1))
|
| 170 |
+
|
| 171 |
+
error_match = re.search(r'(\d+)\s+error', summary)
|
| 172 |
+
if error_match:
|
| 173 |
+
tests_errors = int(error_match.group(1))
|
| 174 |
+
|
| 175 |
+
skipped_match = re.search(r'(\d+)\s+skipped', summary)
|
| 176 |
+
if skipped_match:
|
| 177 |
+
tests_skipped = int(skipped_match.group(1))
|
| 178 |
+
|
| 179 |
+
tests_run = tests_passed + tests_failed + tests_errors + tests_skipped
|
| 180 |
+
|
| 181 |
+
# Fallback: count individual test results if summary not found
|
| 182 |
+
if tests_run == 0:
|
| 183 |
+
passed = stdout.count(" PASSED\n")
|
| 184 |
+
failed = stdout.count(" FAILED\n")
|
| 185 |
+
errors = stdout.count(" ERROR\n")
|
| 186 |
+
skipped = stdout.count(" SKIPPED\n")
|
| 187 |
+
tests_run = passed + failed + errors
|
| 188 |
+
tests_passed = passed
|
| 189 |
+
tests_failed = failed
|
| 190 |
+
tests_errors = errors
|
| 191 |
+
tests_skipped = skipped
|
| 192 |
+
|
| 193 |
+
# Extract coverage percentage from summary
|
| 194 |
+
coverage_percent = 0.0
|
| 195 |
+
# Look for coverage summary: "TOTAL 100 20 80%"
|
| 196 |
+
cov_match = re.search(r'TOTAL\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+)%', stdout)
|
| 197 |
+
if cov_match:
|
| 198 |
+
coverage_percent = float(cov_match.group(1))
|
| 199 |
+
else:
|
| 200 |
+
# Alternative format: "TOTAL 80%"
|
| 201 |
+
cov_match = re.search(r'TOTAL.*?(\d+)%', stdout)
|
| 202 |
+
if cov_match:
|
| 203 |
+
coverage_percent = float(cov_match.group(1))
|
| 204 |
+
|
| 205 |
+
return {
|
| 206 |
+
"success": result.returncode == 0,
|
| 207 |
+
"tests_run": tests_run,
|
| 208 |
+
"tests_passed": tests_passed,
|
| 209 |
+
"tests_failed": tests_failed,
|
| 210 |
+
"tests_errors": tests_errors,
|
| 211 |
+
"tests_skipped": tests_skipped,
|
| 212 |
+
"execution_time": round(execution_time, 2),
|
| 213 |
+
"coverage_percent": coverage_percent,
|
| 214 |
+
"stdout": stdout,
|
| 215 |
+
"stderr": stderr,
|
| 216 |
+
"exit_code": result.returncode,
|
| 217 |
+
"execution_mode": "modal",
|
| 218 |
+
"language": "python"
|
| 219 |
+
}
|
src/sandbox/validator.py
ADDED
|
@@ -0,0 +1,718 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Modal Sandbox Validator - Executes tests in isolated Modal containers.
|
| 3 |
+
Phase 5: Test execution in secure sandbox environment.
|
| 4 |
+
|
| 5 |
+
Supports multiple languages with dedicated Modal container images.
|
| 6 |
+
Falls back to local execution when Modal is not available.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import logging
|
| 11 |
+
import subprocess
|
| 12 |
+
import tempfile
|
| 13 |
+
import json
|
| 14 |
+
import time
|
| 15 |
+
from typing import Dict, List, Optional
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
# Import Modal images and runners
|
| 19 |
+
from .images import (
|
| 20 |
+
MODAL_AVAILABLE, app, LANGUAGE_IMAGES, LANGUAGE_SUPPORT_STATUS,
|
| 21 |
+
get_image_for_language, get_support_status, is_language_supported
|
| 22 |
+
)
|
| 23 |
+
from .runners import LANGUAGE_RUNNERS, get_runner_for_language, is_runner_available
|
| 24 |
+
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _detect_language(file_path: str, code: str) -> str:
|
| 29 |
+
"""Detect programming language from file extension or code content."""
|
| 30 |
+
if file_path:
|
| 31 |
+
ext = Path(file_path).suffix.lower()
|
| 32 |
+
extension_map = {
|
| 33 |
+
# Python
|
| 34 |
+
'.py': 'python', '.pyw': 'python', '.pyx': 'python',
|
| 35 |
+
# Java
|
| 36 |
+
'.java': 'java',
|
| 37 |
+
# JavaScript/TypeScript
|
| 38 |
+
'.js': 'javascript', '.jsx': 'javascript', '.mjs': 'javascript', '.cjs': 'javascript',
|
| 39 |
+
'.ts': 'typescript', '.tsx': 'typescript'
|
| 40 |
+
}
|
| 41 |
+
if ext in extension_map:
|
| 42 |
+
return extension_map[ext]
|
| 43 |
+
|
| 44 |
+
# Fallback: detect from code content
|
| 45 |
+
if code:
|
| 46 |
+
if 'public class' in code or 'import java.' in code:
|
| 47 |
+
return 'java'
|
| 48 |
+
elif 'def ' in code and ('import ' in code or 'from ' in code):
|
| 49 |
+
return 'python'
|
| 50 |
+
elif 'function ' in code or 'const ' in code or 'let ' in code:
|
| 51 |
+
return 'javascript'
|
| 52 |
+
elif 'interface ' in code or 'type ' in code:
|
| 53 |
+
return 'typescript'
|
| 54 |
+
|
| 55 |
+
return 'python' # Default
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def run_tests_locally(code: str, tests: str, requirements: List[str],
|
| 59 |
+
module_name: str = "module", language: str = "python") -> Dict:
|
| 60 |
+
"""
|
| 61 |
+
Execute tests locally (fallback when Modal is not available).
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
code: Modernized code to test
|
| 65 |
+
tests: Generated test code
|
| 66 |
+
requirements: Additional packages needed
|
| 67 |
+
module_name: Name of the module
|
| 68 |
+
language: Programming language
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
Dictionary with test results
|
| 72 |
+
"""
|
| 73 |
+
# Only support Python, Java, JavaScript, and TypeScript
|
| 74 |
+
supported_languages = ['python', 'java', 'javascript', 'typescript']
|
| 75 |
+
|
| 76 |
+
if language not in supported_languages:
|
| 77 |
+
return {
|
| 78 |
+
"success": False,
|
| 79 |
+
"error": f"Unsupported language: {language}. Supported languages: {', '.join(supported_languages)}",
|
| 80 |
+
"tests_run": 0,
|
| 81 |
+
"tests_passed": 0,
|
| 82 |
+
"tests_failed": 0,
|
| 83 |
+
"execution_mode": "unsupported"
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
if language == 'python':
|
| 87 |
+
return _run_python_tests_locally(code, tests, requirements, module_name)
|
| 88 |
+
elif language == 'java':
|
| 89 |
+
return _run_java_tests_locally(code, tests, module_name)
|
| 90 |
+
elif language in ('javascript', 'typescript'):
|
| 91 |
+
return _run_js_tests_locally(code, tests, module_name, language)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _run_python_tests_locally(code: str, tests: str, requirements: List[str],
|
| 95 |
+
module_name: str) -> Dict:
|
| 96 |
+
"""Run Python tests locally using pytest."""
|
| 97 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 98 |
+
tmpdir_path = Path(tmpdir)
|
| 99 |
+
|
| 100 |
+
# Write code and tests in same directory for proper imports
|
| 101 |
+
code_file = tmpdir_path / f"{module_name}.py"
|
| 102 |
+
test_file = tmpdir_path / f"test_{module_name}.py"
|
| 103 |
+
|
| 104 |
+
# Add sys.path manipulation to tests if not already present
|
| 105 |
+
# This ensures tests can import the module even from subdirectories
|
| 106 |
+
if "sys.path" not in tests and "import sys" not in tests:
|
| 107 |
+
path_setup = """import sys
|
| 108 |
+
import os
|
| 109 |
+
# Ensure module can be imported
|
| 110 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 111 |
+
|
| 112 |
+
"""
|
| 113 |
+
tests = path_setup + tests
|
| 114 |
+
|
| 115 |
+
code_file.write_text(code, encoding='utf-8')
|
| 116 |
+
test_file.write_text(tests, encoding='utf-8')
|
| 117 |
+
|
| 118 |
+
# Install additional requirements
|
| 119 |
+
if requirements:
|
| 120 |
+
try:
|
| 121 |
+
subprocess.run(
|
| 122 |
+
["pip", "install", "-q", "--no-cache-dir"] + requirements,
|
| 123 |
+
capture_output=True,
|
| 124 |
+
timeout=60,
|
| 125 |
+
check=False # Don't fail on install errors
|
| 126 |
+
)
|
| 127 |
+
except Exception as e:
|
| 128 |
+
logger.warning(f"Failed to install requirements: {e}")
|
| 129 |
+
|
| 130 |
+
start_time = time.time()
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
result = subprocess.run(
|
| 134 |
+
[
|
| 135 |
+
"pytest",
|
| 136 |
+
str(test_file),
|
| 137 |
+
"-v",
|
| 138 |
+
"--tb=short",
|
| 139 |
+
"--timeout=30",
|
| 140 |
+
"-p", "no:warnings"
|
| 141 |
+
],
|
| 142 |
+
cwd=tmpdir,
|
| 143 |
+
capture_output=True,
|
| 144 |
+
text=True,
|
| 145 |
+
timeout=120
|
| 146 |
+
)
|
| 147 |
+
except subprocess.TimeoutExpired:
|
| 148 |
+
return {
|
| 149 |
+
"success": False,
|
| 150 |
+
"error": "Test execution timeout (>2 minutes)",
|
| 151 |
+
"tests_run": 0,
|
| 152 |
+
"tests_passed": 0,
|
| 153 |
+
"tests_failed": 0,
|
| 154 |
+
"execution_time": 120.0,
|
| 155 |
+
"execution_mode": "local"
|
| 156 |
+
}
|
| 157 |
+
except FileNotFoundError:
|
| 158 |
+
return {
|
| 159 |
+
"success": False,
|
| 160 |
+
"error": "pytest not found. Install with: pip install pytest",
|
| 161 |
+
"tests_run": 0,
|
| 162 |
+
"tests_passed": 0,
|
| 163 |
+
"tests_failed": 0,
|
| 164 |
+
"execution_mode": "local"
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
execution_time = time.time() - start_time
|
| 168 |
+
stdout = result.stdout
|
| 169 |
+
|
| 170 |
+
# Count tests
|
| 171 |
+
passed = stdout.count(" PASSED")
|
| 172 |
+
failed = stdout.count(" FAILED")
|
| 173 |
+
errors = stdout.count(" ERROR")
|
| 174 |
+
test_count = passed + failed + errors
|
| 175 |
+
|
| 176 |
+
return {
|
| 177 |
+
"success": result.returncode == 0,
|
| 178 |
+
"tests_run": test_count,
|
| 179 |
+
"tests_passed": passed,
|
| 180 |
+
"tests_failed": failed,
|
| 181 |
+
"tests_errors": errors,
|
| 182 |
+
"execution_time": round(execution_time, 2),
|
| 183 |
+
"coverage_percent": 0.0, # Coverage not measured in local mode
|
| 184 |
+
"stdout": stdout,
|
| 185 |
+
"stderr": result.stderr,
|
| 186 |
+
"exit_code": result.returncode,
|
| 187 |
+
"execution_mode": "local"
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def _run_java_tests_locally(code: str, tests: str, module_name: str) -> Dict:
|
| 192 |
+
"""Run Java tests locally using JUnit."""
|
| 193 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 194 |
+
tmpdir_path = Path(tmpdir)
|
| 195 |
+
|
| 196 |
+
# Extract class name from code
|
| 197 |
+
class_name = module_name.replace('_', '').title()
|
| 198 |
+
if 'public class ' in code:
|
| 199 |
+
import re
|
| 200 |
+
match = re.search(r'public class (\w+)', code)
|
| 201 |
+
if match:
|
| 202 |
+
class_name = match.group(1)
|
| 203 |
+
|
| 204 |
+
# Write Java files
|
| 205 |
+
code_file = tmpdir_path / f"{class_name}.java"
|
| 206 |
+
test_file = tmpdir_path / f"{class_name}Test.java"
|
| 207 |
+
|
| 208 |
+
code_file.write_text(code, encoding='utf-8')
|
| 209 |
+
test_file.write_text(tests, encoding='utf-8')
|
| 210 |
+
|
| 211 |
+
start_time = time.time()
|
| 212 |
+
|
| 213 |
+
try:
|
| 214 |
+
# Compile
|
| 215 |
+
compile_result = subprocess.run(
|
| 216 |
+
["javac", str(code_file), str(test_file)],
|
| 217 |
+
cwd=tmpdir,
|
| 218 |
+
capture_output=True,
|
| 219 |
+
text=True,
|
| 220 |
+
timeout=60
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
if compile_result.returncode != 0:
|
| 224 |
+
return {
|
| 225 |
+
"success": False,
|
| 226 |
+
"error": f"Compilation failed: {compile_result.stderr}",
|
| 227 |
+
"tests_run": 0,
|
| 228 |
+
"tests_passed": 0,
|
| 229 |
+
"tests_failed": 0,
|
| 230 |
+
"execution_mode": "local"
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
# Run tests (simplified - would need JUnit runner in real scenario)
|
| 234 |
+
run_result = subprocess.run(
|
| 235 |
+
["java", f"{class_name}Test"],
|
| 236 |
+
cwd=tmpdir,
|
| 237 |
+
capture_output=True,
|
| 238 |
+
text=True,
|
| 239 |
+
timeout=120
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
execution_time = time.time() - start_time
|
| 243 |
+
|
| 244 |
+
return {
|
| 245 |
+
"success": run_result.returncode == 0,
|
| 246 |
+
"tests_run": 1, # Simplified
|
| 247 |
+
"tests_passed": 1 if run_result.returncode == 0 else 0,
|
| 248 |
+
"tests_failed": 0 if run_result.returncode == 0 else 1,
|
| 249 |
+
"execution_time": round(execution_time, 2),
|
| 250 |
+
"stdout": run_result.stdout,
|
| 251 |
+
"stderr": run_result.stderr,
|
| 252 |
+
"exit_code": run_result.returncode,
|
| 253 |
+
"execution_mode": "local"
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
except FileNotFoundError:
|
| 257 |
+
return {
|
| 258 |
+
"success": False,
|
| 259 |
+
"error": "Java compiler (javac) not found. Install JDK.",
|
| 260 |
+
"tests_run": 0,
|
| 261 |
+
"tests_passed": 0,
|
| 262 |
+
"tests_failed": 0,
|
| 263 |
+
"execution_mode": "local"
|
| 264 |
+
}
|
| 265 |
+
except subprocess.TimeoutExpired:
|
| 266 |
+
return {
|
| 267 |
+
"success": False,
|
| 268 |
+
"error": "Java test execution timeout",
|
| 269 |
+
"tests_run": 0,
|
| 270 |
+
"tests_passed": 0,
|
| 271 |
+
"tests_failed": 0,
|
| 272 |
+
"execution_mode": "local"
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
|
| 276 |
+
def _run_js_tests_locally(code: str, tests: str, module_name: str,
|
| 277 |
+
language: str) -> Dict:
|
| 278 |
+
"""Run JavaScript/TypeScript tests locally using Jest or Node."""
|
| 279 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 280 |
+
tmpdir_path = Path(tmpdir)
|
| 281 |
+
|
| 282 |
+
ext = '.ts' if language == 'typescript' else '.js'
|
| 283 |
+
|
| 284 |
+
# Write files
|
| 285 |
+
code_file = tmpdir_path / f"{module_name}{ext}"
|
| 286 |
+
test_file = tmpdir_path / f"{module_name}.test{ext}"
|
| 287 |
+
|
| 288 |
+
code_file.write_text(code, encoding='utf-8')
|
| 289 |
+
test_file.write_text(tests, encoding='utf-8')
|
| 290 |
+
|
| 291 |
+
# Create minimal package.json
|
| 292 |
+
package_json = {
|
| 293 |
+
"name": "test-sandbox",
|
| 294 |
+
"scripts": {"test": "jest"},
|
| 295 |
+
"devDependencies": {"jest": "^29.0.0"}
|
| 296 |
+
}
|
| 297 |
+
if language == 'typescript':
|
| 298 |
+
package_json["devDependencies"]["ts-jest"] = "^29.0.0"
|
| 299 |
+
package_json["devDependencies"]["typescript"] = "^5.0.0"
|
| 300 |
+
|
| 301 |
+
(tmpdir_path / "package.json").write_text(json.dumps(package_json))
|
| 302 |
+
|
| 303 |
+
start_time = time.time()
|
| 304 |
+
|
| 305 |
+
try:
|
| 306 |
+
# Try running with node directly for simple tests
|
| 307 |
+
run_result = subprocess.run(
|
| 308 |
+
["node", str(test_file)],
|
| 309 |
+
cwd=tmpdir,
|
| 310 |
+
capture_output=True,
|
| 311 |
+
text=True,
|
| 312 |
+
timeout=60
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
execution_time = time.time() - start_time
|
| 316 |
+
|
| 317 |
+
return {
|
| 318 |
+
"success": run_result.returncode == 0,
|
| 319 |
+
"tests_run": 1,
|
| 320 |
+
"tests_passed": 1 if run_result.returncode == 0 else 0,
|
| 321 |
+
"tests_failed": 0 if run_result.returncode == 0 else 1,
|
| 322 |
+
"execution_time": round(execution_time, 2),
|
| 323 |
+
"stdout": run_result.stdout,
|
| 324 |
+
"stderr": run_result.stderr,
|
| 325 |
+
"exit_code": run_result.returncode,
|
| 326 |
+
"execution_mode": "local"
|
| 327 |
+
}
|
| 328 |
+
|
| 329 |
+
except FileNotFoundError:
|
| 330 |
+
return {
|
| 331 |
+
"success": False,
|
| 332 |
+
"error": "Node.js not found. Install Node.js.",
|
| 333 |
+
"tests_run": 0,
|
| 334 |
+
"tests_passed": 0,
|
| 335 |
+
"tests_failed": 0,
|
| 336 |
+
"execution_mode": "local"
|
| 337 |
+
}
|
| 338 |
+
except subprocess.TimeoutExpired:
|
| 339 |
+
return {
|
| 340 |
+
"success": False,
|
| 341 |
+
"error": "JavaScript test execution timeout",
|
| 342 |
+
"tests_run": 0,
|
| 343 |
+
"tests_passed": 0,
|
| 344 |
+
"tests_failed": 0,
|
| 345 |
+
"execution_mode": "local"
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
# Import Modal executor if available
|
| 350 |
+
if MODAL_AVAILABLE:
|
| 351 |
+
try:
|
| 352 |
+
from .modal_executor import execute_in_modal
|
| 353 |
+
MODAL_EXECUTOR_AVAILABLE = True
|
| 354 |
+
except Exception as e:
|
| 355 |
+
logger.warning(f"Failed to import Modal executor: {e}")
|
| 356 |
+
MODAL_EXECUTOR_AVAILABLE = False
|
| 357 |
+
else:
|
| 358 |
+
MODAL_EXECUTOR_AVAILABLE = False
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def run_tests_in_sandbox(code: str, tests: str, requirements: List[str],
|
| 362 |
+
module_name: str = "module", language: str = "python") -> Dict:
|
| 363 |
+
"""
|
| 364 |
+
Execute tests in sandbox (Modal if available, otherwise local).
|
| 365 |
+
|
| 366 |
+
Args:
|
| 367 |
+
code: Source code
|
| 368 |
+
tests: Test code
|
| 369 |
+
requirements: Package requirements
|
| 370 |
+
module_name: Module name
|
| 371 |
+
language: Programming language
|
| 372 |
+
|
| 373 |
+
Returns:
|
| 374 |
+
Test execution results
|
| 375 |
+
"""
|
| 376 |
+
if MODAL_EXECUTOR_AVAILABLE:
|
| 377 |
+
try:
|
| 378 |
+
return execute_in_modal(code, tests, requirements, module_name, language)
|
| 379 |
+
except Exception as e:
|
| 380 |
+
logger.warning(f"Modal execution failed: {e}, falling back to local")
|
| 381 |
+
return run_tests_locally(code, tests, requirements, module_name, language)
|
| 382 |
+
else:
|
| 383 |
+
logger.info("Modal not available, running tests locally")
|
| 384 |
+
return run_tests_locally(code, tests, requirements, module_name, language)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
class ModalSandboxValidator:
|
| 388 |
+
"""
|
| 389 |
+
Validates code transformations using Modal sandbox.
|
| 390 |
+
Provides safe, isolated test execution environment.
|
| 391 |
+
Falls back to local execution when Modal is not available.
|
| 392 |
+
|
| 393 |
+
Supports multiple languages: Python, Java, JavaScript, TypeScript, etc.
|
| 394 |
+
"""
|
| 395 |
+
|
| 396 |
+
def __init__(self, prefer_modal: bool = None):
|
| 397 |
+
"""
|
| 398 |
+
Initialize Modal Sandbox Validator.
|
| 399 |
+
|
| 400 |
+
Args:
|
| 401 |
+
prefer_modal: If True, try Modal first, fallback to local.
|
| 402 |
+
If False, always use local execution.
|
| 403 |
+
If None (default), auto-detect based on environment.
|
| 404 |
+
"""
|
| 405 |
+
# Import config to get environment-aware settings
|
| 406 |
+
from .config import should_prefer_modal, validate_environment, IS_HUGGINGFACE
|
| 407 |
+
|
| 408 |
+
# Auto-detect if not specified
|
| 409 |
+
if prefer_modal is None:
|
| 410 |
+
prefer_modal = should_prefer_modal()
|
| 411 |
+
|
| 412 |
+
self.prefer_modal = prefer_modal and MODAL_AVAILABLE
|
| 413 |
+
self.is_huggingface = IS_HUGGINGFACE
|
| 414 |
+
self.app = app
|
| 415 |
+
|
| 416 |
+
# Validate environment configuration
|
| 417 |
+
validate_environment()
|
| 418 |
+
|
| 419 |
+
if self.is_huggingface and not self.prefer_modal:
|
| 420 |
+
logger.error("Running on Hugging Face but Modal is not available!")
|
| 421 |
+
logger.error("Test execution will fail. Please configure Modal.")
|
| 422 |
+
|
| 423 |
+
if self.prefer_modal:
|
| 424 |
+
logger.info("ModalSandboxValidator initialized with Modal support")
|
| 425 |
+
else:
|
| 426 |
+
logger.info("ModalSandboxValidator initialized (local execution mode)")
|
| 427 |
+
|
| 428 |
+
def validate_transformation(
|
| 429 |
+
self,
|
| 430 |
+
original_code: str,
|
| 431 |
+
modernized_code: str,
|
| 432 |
+
tests: str,
|
| 433 |
+
requirements: Optional[List[str]] = None,
|
| 434 |
+
file_path: Optional[str] = None
|
| 435 |
+
) -> Dict:
|
| 436 |
+
"""
|
| 437 |
+
Validate code transformation by running tests in sandbox.
|
| 438 |
+
|
| 439 |
+
Args:
|
| 440 |
+
original_code: Original legacy code
|
| 441 |
+
modernized_code: Modernized code
|
| 442 |
+
tests: Generated test code
|
| 443 |
+
requirements: Additional packages needed
|
| 444 |
+
file_path: Path to the file (used to extract module name and language)
|
| 445 |
+
|
| 446 |
+
Returns:
|
| 447 |
+
Validation results with test metrics
|
| 448 |
+
"""
|
| 449 |
+
logger.info("Starting sandbox validation")
|
| 450 |
+
|
| 451 |
+
# Detect language from file path or code
|
| 452 |
+
language = _detect_language(file_path, modernized_code)
|
| 453 |
+
logger.info(f"Detected language: {language}")
|
| 454 |
+
|
| 455 |
+
# Extract requirements based on language
|
| 456 |
+
if requirements is None:
|
| 457 |
+
requirements = self._extract_requirements(modernized_code, language)
|
| 458 |
+
|
| 459 |
+
# Extract module name from file path
|
| 460 |
+
if file_path:
|
| 461 |
+
module_name = Path(file_path).stem
|
| 462 |
+
else:
|
| 463 |
+
module_name = "module"
|
| 464 |
+
|
| 465 |
+
logger.info(f"Validating module: {module_name} (language: {language})")
|
| 466 |
+
|
| 467 |
+
# Try Modal first if available and preferred
|
| 468 |
+
if self.prefer_modal and MODAL_AVAILABLE:
|
| 469 |
+
try:
|
| 470 |
+
logger.info("Attempting Modal sandbox execution...")
|
| 471 |
+
results = run_tests_in_sandbox(
|
| 472 |
+
code=modernized_code,
|
| 473 |
+
tests=tests,
|
| 474 |
+
requirements=requirements,
|
| 475 |
+
module_name=module_name,
|
| 476 |
+
language=language
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
results['execution_mode'] = 'modal'
|
| 480 |
+
logger.info(f"Modal validation complete: {results['tests_passed']}/{results['tests_run']} passed")
|
| 481 |
+
return results
|
| 482 |
+
|
| 483 |
+
except Exception as e:
|
| 484 |
+
logger.warning(f"Modal execution failed: {e}, falling back to local")
|
| 485 |
+
|
| 486 |
+
# Fallback to local execution
|
| 487 |
+
logger.info("Running tests locally...")
|
| 488 |
+
try:
|
| 489 |
+
results = run_tests_locally(
|
| 490 |
+
code=modernized_code,
|
| 491 |
+
tests=tests,
|
| 492 |
+
requirements=requirements,
|
| 493 |
+
module_name=module_name,
|
| 494 |
+
language=language
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
logger.info(f"Local validation complete: {results['tests_passed']}/{results['tests_run']} passed")
|
| 498 |
+
return results
|
| 499 |
+
|
| 500 |
+
except Exception as e:
|
| 501 |
+
logger.error(f"Local validation error: {e}")
|
| 502 |
+
return {
|
| 503 |
+
"success": False,
|
| 504 |
+
"error": str(e),
|
| 505 |
+
"tests_run": 0,
|
| 506 |
+
"tests_passed": 0,
|
| 507 |
+
"tests_failed": 0,
|
| 508 |
+
"execution_mode": "failed"
|
| 509 |
+
}
|
| 510 |
+
|
| 511 |
+
def validate_batch(
|
| 512 |
+
self,
|
| 513 |
+
transformations: List[Dict]
|
| 514 |
+
) -> List[Dict]:
|
| 515 |
+
"""
|
| 516 |
+
Validate multiple transformations in parallel.
|
| 517 |
+
|
| 518 |
+
Args:
|
| 519 |
+
transformations: List of transformation dicts with code and tests
|
| 520 |
+
|
| 521 |
+
Returns:
|
| 522 |
+
List of validation results
|
| 523 |
+
"""
|
| 524 |
+
logger.info(f"Starting batch validation for {len(transformations)} files")
|
| 525 |
+
|
| 526 |
+
results = []
|
| 527 |
+
|
| 528 |
+
# Try Modal batch execution if available
|
| 529 |
+
if self.prefer_modal and MODAL_AVAILABLE:
|
| 530 |
+
try:
|
| 531 |
+
# For batch operations, we can call functions directly
|
| 532 |
+
# Modal handles the parallelization internally
|
| 533 |
+
for t in transformations:
|
| 534 |
+
file_path = t.get('file_path', '')
|
| 535 |
+
language = _detect_language(file_path, t['modernized_code'])
|
| 536 |
+
|
| 537 |
+
try:
|
| 538 |
+
result = run_tests_in_sandbox(
|
| 539 |
+
code=t['modernized_code'],
|
| 540 |
+
tests=t['tests'],
|
| 541 |
+
requirements=t.get('requirements', []),
|
| 542 |
+
module_name=Path(file_path).stem if file_path else 'module',
|
| 543 |
+
language=language
|
| 544 |
+
)
|
| 545 |
+
result['file_path'] = file_path
|
| 546 |
+
result['execution_mode'] = 'modal'
|
| 547 |
+
results.append(result)
|
| 548 |
+
except Exception as e:
|
| 549 |
+
logger.error(f"Error validating {file_path}: {e}")
|
| 550 |
+
results.append({
|
| 551 |
+
"file_path": file_path,
|
| 552 |
+
"success": False,
|
| 553 |
+
"error": str(e),
|
| 554 |
+
"execution_mode": "modal_failed"
|
| 555 |
+
})
|
| 556 |
+
|
| 557 |
+
logger.info(f"Modal batch validation complete: {len(results)} results")
|
| 558 |
+
return results
|
| 559 |
+
|
| 560 |
+
except Exception as e:
|
| 561 |
+
logger.warning(f"Modal batch execution failed: {e}, falling back to local")
|
| 562 |
+
results = [] # Reset for local execution
|
| 563 |
+
|
| 564 |
+
# Fallback to local sequential execution
|
| 565 |
+
for t in transformations:
|
| 566 |
+
file_path = t.get('file_path', '')
|
| 567 |
+
language = _detect_language(file_path, t['modernized_code'])
|
| 568 |
+
|
| 569 |
+
try:
|
| 570 |
+
result = run_tests_locally(
|
| 571 |
+
code=t['modernized_code'],
|
| 572 |
+
tests=t['tests'],
|
| 573 |
+
requirements=t.get('requirements', []),
|
| 574 |
+
module_name=Path(file_path).stem if file_path else 'module',
|
| 575 |
+
language=language
|
| 576 |
+
)
|
| 577 |
+
result['file_path'] = file_path
|
| 578 |
+
results.append(result)
|
| 579 |
+
except Exception as e:
|
| 580 |
+
logger.error(f"Error validating {file_path}: {e}")
|
| 581 |
+
results.append({
|
| 582 |
+
"file_path": file_path,
|
| 583 |
+
"success": False,
|
| 584 |
+
"error": str(e),
|
| 585 |
+
"execution_mode": "local_failed"
|
| 586 |
+
})
|
| 587 |
+
|
| 588 |
+
logger.info(f"Local batch validation complete: {len(results)} results")
|
| 589 |
+
return results
|
| 590 |
+
|
| 591 |
+
def _extract_requirements(self, code: str, language: str = "python") -> List[str]:
|
| 592 |
+
"""
|
| 593 |
+
Extract required packages from import statements.
|
| 594 |
+
|
| 595 |
+
Args:
|
| 596 |
+
code: Source code
|
| 597 |
+
language: Programming language
|
| 598 |
+
|
| 599 |
+
Returns:
|
| 600 |
+
List of package names
|
| 601 |
+
"""
|
| 602 |
+
requirements = []
|
| 603 |
+
|
| 604 |
+
if language == 'python':
|
| 605 |
+
# Python import to package mappings
|
| 606 |
+
import_map = {
|
| 607 |
+
'sqlalchemy': 'sqlalchemy',
|
| 608 |
+
'pymysql': 'pymysql',
|
| 609 |
+
'requests': 'requests',
|
| 610 |
+
'flask': 'flask',
|
| 611 |
+
'django': 'django',
|
| 612 |
+
'numpy': 'numpy',
|
| 613 |
+
'pandas': 'pandas',
|
| 614 |
+
'fastapi': 'fastapi',
|
| 615 |
+
'pydantic': 'pydantic',
|
| 616 |
+
'aiohttp': 'aiohttp',
|
| 617 |
+
'httpx': 'httpx',
|
| 618 |
+
'pytest': 'pytest'
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
for line in code.split('\n'):
|
| 622 |
+
line = line.strip()
|
| 623 |
+
if line.startswith('import ') or line.startswith('from '):
|
| 624 |
+
parts = line.split()
|
| 625 |
+
if len(parts) >= 2:
|
| 626 |
+
module = parts[1].split('.')[0]
|
| 627 |
+
if module in import_map:
|
| 628 |
+
pkg = import_map[module]
|
| 629 |
+
if pkg not in requirements:
|
| 630 |
+
requirements.append(pkg)
|
| 631 |
+
|
| 632 |
+
elif language == 'java':
|
| 633 |
+
# Java dependencies would be handled via Maven/Gradle
|
| 634 |
+
# Return empty list - dependencies managed differently
|
| 635 |
+
pass
|
| 636 |
+
|
| 637 |
+
elif language in ('javascript', 'typescript'):
|
| 638 |
+
# JavaScript/TypeScript - look for require/import statements
|
| 639 |
+
import_map = {
|
| 640 |
+
'express': 'express',
|
| 641 |
+
'axios': 'axios',
|
| 642 |
+
'lodash': 'lodash',
|
| 643 |
+
'moment': 'moment',
|
| 644 |
+
'react': 'react',
|
| 645 |
+
'jest': 'jest'
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
for line in code.split('\n'):
|
| 649 |
+
line = line.strip()
|
| 650 |
+
for pkg in import_map:
|
| 651 |
+
if f"'{pkg}'" in line or f'"{pkg}"' in line:
|
| 652 |
+
if pkg not in requirements:
|
| 653 |
+
requirements.append(pkg)
|
| 654 |
+
|
| 655 |
+
return requirements
|
| 656 |
+
|
| 657 |
+
def test_behavioral_equivalence(
|
| 658 |
+
self,
|
| 659 |
+
original_code: str,
|
| 660 |
+
modernized_code: str,
|
| 661 |
+
test_cases: List[Dict]
|
| 662 |
+
) -> Dict:
|
| 663 |
+
"""
|
| 664 |
+
Test that modernized code produces same outputs as original.
|
| 665 |
+
|
| 666 |
+
Args:
|
| 667 |
+
original_code: Original code
|
| 668 |
+
modernized_code: Modernized code
|
| 669 |
+
test_cases: List of test case dicts with inputs and expected outputs
|
| 670 |
+
|
| 671 |
+
Returns:
|
| 672 |
+
Equivalence test results
|
| 673 |
+
"""
|
| 674 |
+
logger.info("Testing behavioral equivalence")
|
| 675 |
+
|
| 676 |
+
# Generate equivalence test
|
| 677 |
+
equivalence_test = self._generate_equivalence_test(test_cases)
|
| 678 |
+
|
| 679 |
+
# Test both versions
|
| 680 |
+
original_results = self.validate_transformation(
|
| 681 |
+
original_code, original_code, equivalence_test
|
| 682 |
+
)
|
| 683 |
+
|
| 684 |
+
modernized_results = self.validate_transformation(
|
| 685 |
+
original_code, modernized_code, equivalence_test
|
| 686 |
+
)
|
| 687 |
+
|
| 688 |
+
# Compare results
|
| 689 |
+
equivalence_score = 0.0
|
| 690 |
+
if original_results['success'] and modernized_results['success']:
|
| 691 |
+
if original_results['tests_passed'] == modernized_results['tests_passed']:
|
| 692 |
+
equivalence_score = 1.0
|
| 693 |
+
else:
|
| 694 |
+
equivalence_score = (
|
| 695 |
+
modernized_results['tests_passed'] /
|
| 696 |
+
max(original_results['tests_passed'], 1)
|
| 697 |
+
)
|
| 698 |
+
|
| 699 |
+
return {
|
| 700 |
+
"behavioral_equivalence": equivalence_score >= 0.95,
|
| 701 |
+
"equivalence_score": round(equivalence_score, 3),
|
| 702 |
+
"original_results": original_results,
|
| 703 |
+
"modernized_results": modernized_results
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
def _generate_equivalence_test(self, test_cases: List[Dict]) -> str:
|
| 707 |
+
"""Generate pytest code for equivalence testing."""
|
| 708 |
+
test_code = "import pytest\n\n"
|
| 709 |
+
|
| 710 |
+
for i, case in enumerate(test_cases):
|
| 711 |
+
test_code += f"""
|
| 712 |
+
def test_equivalence_{i}():
|
| 713 |
+
\"\"\"Test case {i}: {case.get('description', 'equivalence test')}\"\"\"
|
| 714 |
+
# Test implementation would go here
|
| 715 |
+
assert True
|
| 716 |
+
"""
|
| 717 |
+
|
| 718 |
+
return test_code
|
src/search/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Search module for semantic code search using LlamaIndex and Chroma.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .vector_store import CodeSearchEngine
|
| 6 |
+
from .embeddings import ModalEmbedding, GeminiEmbeddingWrapper, get_embedding_model
|
| 7 |
+
|
| 8 |
+
__all__ = ['CodeSearchEngine', 'ModalEmbedding', 'GeminiEmbeddingWrapper', 'get_embedding_model']
|
src/search/embeddings.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Custom embedding implementations for Modal and Gemini.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import logging
|
| 7 |
+
from typing import List, Optional
|
| 8 |
+
from llama_index.core.embeddings import BaseEmbedding
|
| 9 |
+
from llama_index.core.bridge.pydantic import PrivateAttr
|
| 10 |
+
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
# Global tokenizer instance (lazy loaded)
|
| 14 |
+
_tokenizer = None
|
| 15 |
+
|
| 16 |
+
def get_tokenizer():
|
| 17 |
+
"""Get or create the tokenizer for BAAI/bge-base-en-v1.5."""
|
| 18 |
+
global _tokenizer
|
| 19 |
+
if _tokenizer is None:
|
| 20 |
+
try:
|
| 21 |
+
from transformers import AutoTokenizer
|
| 22 |
+
_tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-base-en-v1.5")
|
| 23 |
+
logger.info("Tokenizer loaded successfully")
|
| 24 |
+
except Exception as e:
|
| 25 |
+
logger.warning(f"Failed to load tokenizer: {e}. Falling back to word-based truncation.")
|
| 26 |
+
_tokenizer = False # Mark as failed
|
| 27 |
+
return _tokenizer if _tokenizer else None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class ModalEmbedding(BaseEmbedding):
|
| 31 |
+
"""
|
| 32 |
+
Custom embedding class that uses Modal's deployed TEI service.
|
| 33 |
+
Primary embedding model for the application.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
_modal_instance: Optional[object] = PrivateAttr(default=None)
|
| 37 |
+
_model_name: str = PrivateAttr(default="BAAI/bge-base-en-v1.5")
|
| 38 |
+
_max_text_length: int = PrivateAttr(default=4000) # Reduced max chars per text
|
| 39 |
+
_batch_size: int = PrivateAttr(default=2) # Very small batches to avoid 413
|
| 40 |
+
|
| 41 |
+
def __init__(self, **kwargs):
|
| 42 |
+
"""Initialize Modal embedding client."""
|
| 43 |
+
super().__init__(**kwargs)
|
| 44 |
+
try:
|
| 45 |
+
import modal
|
| 46 |
+
# Use modal.Cls.from_name and get an instance
|
| 47 |
+
TextEmbeddingsInference = modal.Cls.from_name(
|
| 48 |
+
"text-embeddings-inference-api",
|
| 49 |
+
"TextEmbeddingsInference"
|
| 50 |
+
)
|
| 51 |
+
# Create an instance and store it
|
| 52 |
+
self._modal_instance = TextEmbeddingsInference()
|
| 53 |
+
logger.info("ModalEmbedding initialized successfully")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
logger.error(f"Failed to initialize Modal embedding: {e}")
|
| 56 |
+
raise
|
| 57 |
+
|
| 58 |
+
def _truncate_text(self, text: str) -> str:
|
| 59 |
+
"""Truncate text to max token limit using proper tokenization."""
|
| 60 |
+
# Modal TEI has a hard limit of 512 tokens
|
| 61 |
+
# Use 500 tokens to be safe (leave some buffer)
|
| 62 |
+
max_tokens = 500
|
| 63 |
+
|
| 64 |
+
tokenizer = get_tokenizer()
|
| 65 |
+
|
| 66 |
+
if tokenizer:
|
| 67 |
+
# Use proper tokenization
|
| 68 |
+
try:
|
| 69 |
+
tokens = tokenizer.encode(text, add_special_tokens=False)
|
| 70 |
+
if len(tokens) > max_tokens:
|
| 71 |
+
# Truncate to max_tokens
|
| 72 |
+
truncated_tokens = tokens[:max_tokens]
|
| 73 |
+
# Decode back to text
|
| 74 |
+
return tokenizer.decode(truncated_tokens, skip_special_tokens=True)
|
| 75 |
+
return text
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.warning(f"Tokenization failed: {e}. Using word-based fallback.")
|
| 78 |
+
|
| 79 |
+
# Fallback: word-based truncation (conservative estimate)
|
| 80 |
+
# Assume 1.3 tokens per word: 500 tokens ≈ 385 words
|
| 81 |
+
# Use 250 words to be very conservative
|
| 82 |
+
words = text.split()
|
| 83 |
+
if len(words) > 250:
|
| 84 |
+
truncated_words = words[:250]
|
| 85 |
+
return ' '.join(truncated_words)
|
| 86 |
+
return text
|
| 87 |
+
|
| 88 |
+
@classmethod
|
| 89 |
+
def class_name(cls) -> str:
|
| 90 |
+
return "ModalEmbedding"
|
| 91 |
+
|
| 92 |
+
async def _aget_query_embedding(self, query: str) -> List[float]:
|
| 93 |
+
"""Get query embedding asynchronously."""
|
| 94 |
+
return await self._aget_text_embedding(query)
|
| 95 |
+
|
| 96 |
+
async def _aget_text_embedding(self, text: str) -> List[float]:
|
| 97 |
+
"""Get text embedding asynchronously."""
|
| 98 |
+
try:
|
| 99 |
+
text = self._truncate_text(text)
|
| 100 |
+
embeddings = await self._modal_instance.embed.remote.aio([text])
|
| 101 |
+
return embeddings[0]
|
| 102 |
+
except Exception as e:
|
| 103 |
+
logger.error(f"Error getting embedding from Modal: {e}")
|
| 104 |
+
raise
|
| 105 |
+
|
| 106 |
+
def _get_query_embedding(self, query: str) -> List[float]:
|
| 107 |
+
"""Get query embedding synchronously."""
|
| 108 |
+
return self._get_text_embedding(query)
|
| 109 |
+
|
| 110 |
+
def _get_text_embedding(self, text: str) -> List[float]:
|
| 111 |
+
"""Get text embedding synchronously."""
|
| 112 |
+
try:
|
| 113 |
+
text = self._truncate_text(text)
|
| 114 |
+
embeddings = self._modal_instance.embed.remote([text])
|
| 115 |
+
return embeddings[0]
|
| 116 |
+
except Exception as e:
|
| 117 |
+
logger.error(f"Error getting embedding from Modal: {e}")
|
| 118 |
+
# If Modal fails due to size limits, try to fall back to Gemini for this request
|
| 119 |
+
if "413" in str(e) or "Payload Too Large" in str(e) or "Input validation error" in str(e):
|
| 120 |
+
logger.warning("Modal embedding failed due to size limits, attempting Gemini fallback for this request")
|
| 121 |
+
try:
|
| 122 |
+
gemini_wrapper = GeminiEmbeddingWrapper()
|
| 123 |
+
return gemini_wrapper._get_text_embedding(text)
|
| 124 |
+
except Exception as gemini_e:
|
| 125 |
+
logger.error(f"Gemini fallback also failed: {gemini_e}")
|
| 126 |
+
raise e
|
| 127 |
+
raise
|
| 128 |
+
|
| 129 |
+
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
| 130 |
+
"""Get embeddings for multiple texts with batching."""
|
| 131 |
+
# Truncate all texts
|
| 132 |
+
texts = [self._truncate_text(t) for t in texts]
|
| 133 |
+
|
| 134 |
+
# Process in smaller batches to avoid payload size issues
|
| 135 |
+
all_embeddings = []
|
| 136 |
+
for i in range(0, len(texts), self._batch_size):
|
| 137 |
+
batch = texts[i:i + self._batch_size]
|
| 138 |
+
try:
|
| 139 |
+
batch_embeddings = self._modal_instance.embed.remote(batch)
|
| 140 |
+
all_embeddings.extend(batch_embeddings)
|
| 141 |
+
except Exception as e:
|
| 142 |
+
logger.error(f"Error getting embeddings from Modal for batch {i//self._batch_size + 1}: {e}")
|
| 143 |
+
raise
|
| 144 |
+
|
| 145 |
+
return all_embeddings
|
| 146 |
+
|
| 147 |
+
async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
| 148 |
+
"""Get embeddings for multiple texts asynchronously with batching."""
|
| 149 |
+
# Truncate all texts
|
| 150 |
+
texts = [self._truncate_text(t) for t in texts]
|
| 151 |
+
|
| 152 |
+
# Process in smaller batches to avoid payload size issues
|
| 153 |
+
all_embeddings = []
|
| 154 |
+
for i in range(0, len(texts), self._batch_size):
|
| 155 |
+
batch = texts[i:i + self._batch_size]
|
| 156 |
+
try:
|
| 157 |
+
batch_embeddings = await self._modal_instance.embed.remote.aio(batch)
|
| 158 |
+
all_embeddings.extend(batch_embeddings)
|
| 159 |
+
except Exception as e:
|
| 160 |
+
logger.error(f"Error getting embeddings from Modal for batch {i//self._batch_size + 1}: {e}")
|
| 161 |
+
raise
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
class NebiusEmbeddingWrapper(BaseEmbedding):
|
| 165 |
+
"""
|
| 166 |
+
Wrapper for Nebius embeddings using OpenAI-compatible API.
|
| 167 |
+
Uses Qwen/Qwen3-Embedding-8B model (4096 dimensions).
|
| 168 |
+
"""
|
| 169 |
+
|
| 170 |
+
_client: Optional[object] = PrivateAttr(default=None)
|
| 171 |
+
_model_name: str = PrivateAttr(default="Qwen/Qwen3-Embedding-8B")
|
| 172 |
+
|
| 173 |
+
def __init__(self, api_key: Optional[str] = None, model_name: str = "Qwen/Qwen3-Embedding-8B", **kwargs):
|
| 174 |
+
"""Initialize Nebius embedding client."""
|
| 175 |
+
super().__init__(**kwargs)
|
| 176 |
+
|
| 177 |
+
# Get API key from environment if not provided
|
| 178 |
+
if not api_key:
|
| 179 |
+
api_key = os.getenv("NEBIUS_API_KEY")
|
| 180 |
+
|
| 181 |
+
if not api_key:
|
| 182 |
+
raise ValueError("NEBIUS_API_KEY not found")
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
from openai import OpenAI
|
| 186 |
+
self._client = OpenAI(
|
| 187 |
+
base_url="https://api.tokenfactory.nebius.com/v1/",
|
| 188 |
+
api_key=api_key
|
| 189 |
+
)
|
| 190 |
+
self._model_name = model_name
|
| 191 |
+
logger.info(f"NebiusEmbeddingWrapper initialized with model: {model_name}")
|
| 192 |
+
except Exception as e:
|
| 193 |
+
logger.error(f"Failed to initialize Nebius embedding: {e}")
|
| 194 |
+
raise
|
| 195 |
+
|
| 196 |
+
@classmethod
|
| 197 |
+
def class_name(cls) -> str:
|
| 198 |
+
return "NebiusEmbeddingWrapper"
|
| 199 |
+
|
| 200 |
+
def _get_query_embedding(self, query: str) -> List[float]:
|
| 201 |
+
"""Get query embedding."""
|
| 202 |
+
return self._get_text_embedding(query)
|
| 203 |
+
|
| 204 |
+
def _get_text_embedding(self, text: str) -> List[float]:
|
| 205 |
+
"""Get text embedding."""
|
| 206 |
+
try:
|
| 207 |
+
response = self._client.embeddings.create(
|
| 208 |
+
model=self._model_name,
|
| 209 |
+
input=text
|
| 210 |
+
)
|
| 211 |
+
return response.data[0].embedding
|
| 212 |
+
except Exception as e:
|
| 213 |
+
logger.error(f"Error getting embedding from Nebius: {e}")
|
| 214 |
+
raise
|
| 215 |
+
|
| 216 |
+
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
| 217 |
+
"""Get embeddings for multiple texts."""
|
| 218 |
+
try:
|
| 219 |
+
response = self._client.embeddings.create(
|
| 220 |
+
model=self._model_name,
|
| 221 |
+
input=texts
|
| 222 |
+
)
|
| 223 |
+
# Sort by index to ensure correct order
|
| 224 |
+
sorted_data = sorted(response.data, key=lambda x: x.index)
|
| 225 |
+
return [item.embedding for item in sorted_data]
|
| 226 |
+
except Exception as e:
|
| 227 |
+
logger.error(f"Error getting batch embeddings from Nebius: {e}")
|
| 228 |
+
raise
|
| 229 |
+
|
| 230 |
+
async def _aget_query_embedding(self, query: str) -> List[float]:
|
| 231 |
+
"""Get query embedding asynchronously."""
|
| 232 |
+
return self._get_query_embedding(query)
|
| 233 |
+
|
| 234 |
+
async def _aget_text_embedding(self, text: str) -> List[float]:
|
| 235 |
+
"""Get text embedding asynchronously."""
|
| 236 |
+
return self._get_text_embedding(text)
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
class GeminiEmbeddingWrapper(BaseEmbedding):
|
| 240 |
+
"""
|
| 241 |
+
Wrapper for Gemini embeddings using the new google-genai SDK.
|
| 242 |
+
Fallback embedding model.
|
| 243 |
+
"""
|
| 244 |
+
|
| 245 |
+
_client: Optional[object] = PrivateAttr(default=None)
|
| 246 |
+
_model_name: str = PrivateAttr(default="models/gemini-embedding-001")
|
| 247 |
+
|
| 248 |
+
def __init__(self, api_key: Optional[str] = None, model_name: str = "models/gemini-embedding-001", **kwargs):
|
| 249 |
+
"""Initialize Gemini embedding client."""
|
| 250 |
+
super().__init__(**kwargs)
|
| 251 |
+
|
| 252 |
+
# Use centralized config if no API key provided
|
| 253 |
+
if not api_key:
|
| 254 |
+
try:
|
| 255 |
+
from src.config import GeminiConfig
|
| 256 |
+
api_key = GeminiConfig.get_api_key()
|
| 257 |
+
except Exception:
|
| 258 |
+
# Fallback to environment variable
|
| 259 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 260 |
+
|
| 261 |
+
if not api_key:
|
| 262 |
+
raise ValueError("GEMINI_API_KEY not found")
|
| 263 |
+
|
| 264 |
+
try:
|
| 265 |
+
from google import genai
|
| 266 |
+
self._client = genai.Client(api_key=api_key)
|
| 267 |
+
self._model_name = model_name
|
| 268 |
+
logger.info(f"GeminiEmbeddingWrapper initialized with model: {model_name}")
|
| 269 |
+
except Exception as e:
|
| 270 |
+
logger.error(f"Failed to initialize Gemini embedding: {e}")
|
| 271 |
+
raise
|
| 272 |
+
|
| 273 |
+
@classmethod
|
| 274 |
+
def class_name(cls) -> str:
|
| 275 |
+
return "GeminiEmbeddingWrapper"
|
| 276 |
+
|
| 277 |
+
def _get_query_embedding(self, query: str) -> List[float]:
|
| 278 |
+
"""Get query embedding."""
|
| 279 |
+
return self._get_text_embedding(query)
|
| 280 |
+
|
| 281 |
+
def _get_text_embedding(self, text: str) -> List[float]:
|
| 282 |
+
"""Get text embedding."""
|
| 283 |
+
try:
|
| 284 |
+
result = self._client.models.embed_content(
|
| 285 |
+
model=self._model_name,
|
| 286 |
+
contents=text
|
| 287 |
+
)
|
| 288 |
+
return result.embeddings[0].values
|
| 289 |
+
except Exception as e:
|
| 290 |
+
logger.error(f"Error getting embedding from Gemini: {e}")
|
| 291 |
+
raise
|
| 292 |
+
|
| 293 |
+
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
|
| 294 |
+
"""Get embeddings for multiple texts."""
|
| 295 |
+
embeddings = []
|
| 296 |
+
for text in texts:
|
| 297 |
+
embeddings.append(self._get_text_embedding(text))
|
| 298 |
+
return embeddings
|
| 299 |
+
|
| 300 |
+
async def _aget_query_embedding(self, query: str) -> List[float]:
|
| 301 |
+
"""Get query embedding asynchronously."""
|
| 302 |
+
return self._get_query_embedding(query)
|
| 303 |
+
|
| 304 |
+
async def _aget_text_embedding(self, text: str) -> List[float]:
|
| 305 |
+
"""Get text embedding asynchronously."""
|
| 306 |
+
return self._get_text_embedding(text)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def get_embedding_model(prefer_modal: bool = True, force_gemini: bool = False) -> BaseEmbedding:
|
| 310 |
+
"""
|
| 311 |
+
Get the best available embedding model.
|
| 312 |
+
|
| 313 |
+
Priority order:
|
| 314 |
+
1. Modal (if prefer_modal=True and available)
|
| 315 |
+
2. Provider-specific embedding (Nebius if AI_PROVIDER=nebius, Gemini otherwise)
|
| 316 |
+
|
| 317 |
+
Args:
|
| 318 |
+
prefer_modal: If True, try Modal first, then fallback to provider-specific
|
| 319 |
+
force_gemini: If True, skip Modal and use Gemini directly
|
| 320 |
+
|
| 321 |
+
Returns:
|
| 322 |
+
BaseEmbedding instance
|
| 323 |
+
"""
|
| 324 |
+
if force_gemini:
|
| 325 |
+
logger.info("Using Gemini embedding (forced)")
|
| 326 |
+
return GeminiEmbeddingWrapper()
|
| 327 |
+
|
| 328 |
+
if prefer_modal:
|
| 329 |
+
try:
|
| 330 |
+
logger.info("Attempting to use Modal embedding (primary)")
|
| 331 |
+
return ModalEmbedding()
|
| 332 |
+
except Exception as e:
|
| 333 |
+
logger.warning(f"Modal embedding unavailable, falling back to provider-specific: {e}")
|
| 334 |
+
|
| 335 |
+
# Determine which provider-specific embedding to use
|
| 336 |
+
ai_provider = os.getenv("AI_PROVIDER", "gemini").lower()
|
| 337 |
+
|
| 338 |
+
if ai_provider == "nebius":
|
| 339 |
+
try:
|
| 340 |
+
logger.info("Using Nebius embedding (Qwen/Qwen3-Embedding-8B)")
|
| 341 |
+
return NebiusEmbeddingWrapper()
|
| 342 |
+
except Exception as e:
|
| 343 |
+
logger.warning(f"Nebius embedding unavailable, falling back to Gemini: {e}")
|
| 344 |
+
|
| 345 |
+
try:
|
| 346 |
+
logger.info("Using Gemini embedding (fallback)")
|
| 347 |
+
return GeminiEmbeddingWrapper()
|
| 348 |
+
except Exception as e:
|
| 349 |
+
logger.error(f"Failed to initialize any embedding model: {e}")
|
| 350 |
+
raise
|
src/search/vector_store.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Vector Store implementation using LlamaIndex and Chroma for semantic code search.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import logging
|
| 7 |
+
from typing import List, Dict, Optional
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings, Document
|
| 11 |
+
from llama_index.vector_stores.chroma import ChromaVectorStore
|
| 12 |
+
import chromadb
|
| 13 |
+
import warnings
|
| 14 |
+
|
| 15 |
+
from .embeddings import get_embedding_model
|
| 16 |
+
from src.config import AIManager
|
| 17 |
+
|
| 18 |
+
# Suppress deprecation warnings
|
| 19 |
+
warnings.filterwarnings('ignore', category=DeprecationWarning, module='llama_index.llms.gemini')
|
| 20 |
+
warnings.filterwarnings('ignore', category=DeprecationWarning, module='llama_index.embeddings.gemini')
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class CodeSearchEngine:
|
| 26 |
+
"""
|
| 27 |
+
Semantic code search engine using LlamaIndex + Chroma vector store.
|
| 28 |
+
Enables finding similar legacy patterns across large codebases.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, persist_dir: Optional[str] = None, use_modal: bool = True):
|
| 32 |
+
"""
|
| 33 |
+
Initialize the code search engine.
|
| 34 |
+
|
| 35 |
+
Args:
|
| 36 |
+
persist_dir: Optional directory to persist Chroma database
|
| 37 |
+
use_modal: If True, use Modal embedding as primary (default: True)
|
| 38 |
+
"""
|
| 39 |
+
self.persist_dir = persist_dir
|
| 40 |
+
self.index: Optional[VectorStoreIndex] = None
|
| 41 |
+
self.chroma_client = None
|
| 42 |
+
self.chroma_collection = None
|
| 43 |
+
self.use_modal = use_modal
|
| 44 |
+
|
| 45 |
+
# Configure embeddings (Modal primary, Gemini fallback)
|
| 46 |
+
try:
|
| 47 |
+
Settings.embed_model = get_embedding_model(prefer_modal=use_modal)
|
| 48 |
+
except Exception as e:
|
| 49 |
+
logger.warning(f"Failed to initialize preferred embedding, using Gemini: {e}")
|
| 50 |
+
Settings.embed_model = get_embedding_model(force_gemini=True)
|
| 51 |
+
self.use_modal = False
|
| 52 |
+
|
| 53 |
+
# Configure LLM using centralized AIManager
|
| 54 |
+
self.ai_manager = AIManager()
|
| 55 |
+
|
| 56 |
+
# Set up LlamaIndex LLM based on provider
|
| 57 |
+
if self.ai_manager.provider_name == "gemini":
|
| 58 |
+
from llama_index.llms.gemini import Gemini
|
| 59 |
+
Settings.llm = Gemini(
|
| 60 |
+
model=self.ai_manager.model_name,
|
| 61 |
+
api_key=os.getenv("GEMINI_API_KEY"),
|
| 62 |
+
temperature=0.1
|
| 63 |
+
)
|
| 64 |
+
elif self.ai_manager.provider_name in ["nebius", "openai"]:
|
| 65 |
+
from llama_index.llms.openai import OpenAI
|
| 66 |
+
if self.ai_manager.provider_name == "nebius":
|
| 67 |
+
# Use gpt-3.5-turbo as placeholder to pass LlamaIndex validation
|
| 68 |
+
# The actual model is passed via additional_kwargs
|
| 69 |
+
Settings.llm = OpenAI(
|
| 70 |
+
model="gpt-3.5-turbo",
|
| 71 |
+
api_key=os.getenv("NEBIUS_API_KEY"),
|
| 72 |
+
api_base="https://api.tokenfactory.nebius.com/v1/",
|
| 73 |
+
temperature=0.1,
|
| 74 |
+
additional_kwargs={"model": self.ai_manager.model_name}
|
| 75 |
+
)
|
| 76 |
+
else:
|
| 77 |
+
Settings.llm = OpenAI(
|
| 78 |
+
model=self.ai_manager.model_name,
|
| 79 |
+
api_key=os.getenv("OPENAI_API_KEY"),
|
| 80 |
+
temperature=0.1
|
| 81 |
+
)
|
| 82 |
+
|
| 83 |
+
embedding_type = "Modal (primary)" if self.use_modal else "Gemini (fallback)"
|
| 84 |
+
logger.info(f"CodeSearchEngine initialized with {embedding_type} embeddings and {self.ai_manager.provider_name} LLM")
|
| 85 |
+
|
| 86 |
+
def build_index(self, repo_path: str, file_extensions: Optional[List[str]] = None) -> VectorStoreIndex:
|
| 87 |
+
"""
|
| 88 |
+
Build searchable index of codebase.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
repo_path: Path to repository to index
|
| 92 |
+
file_extensions: Optional list of file extensions to include (e.g., ['.py', '.java'])
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
VectorStoreIndex for querying
|
| 96 |
+
"""
|
| 97 |
+
logger.info(f"Building code index for: {repo_path}")
|
| 98 |
+
|
| 99 |
+
# Initialize Chroma client
|
| 100 |
+
if self.persist_dir:
|
| 101 |
+
self.chroma_client = chromadb.PersistentClient(path=self.persist_dir)
|
| 102 |
+
else:
|
| 103 |
+
self.chroma_client = chromadb.EphemeralClient()
|
| 104 |
+
|
| 105 |
+
# Create or get collection
|
| 106 |
+
collection_name = "code_embeddings"
|
| 107 |
+
try:
|
| 108 |
+
self.chroma_collection = self.chroma_client.get_or_create_collection(collection_name)
|
| 109 |
+
except Exception as e:
|
| 110 |
+
logger.warning(f"Error with collection, creating new: {e}")
|
| 111 |
+
self.chroma_collection = self.chroma_client.create_collection(collection_name)
|
| 112 |
+
|
| 113 |
+
vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)
|
| 114 |
+
|
| 115 |
+
# Load documents from repository
|
| 116 |
+
documents = self._load_code_files(repo_path, file_extensions)
|
| 117 |
+
|
| 118 |
+
if not documents:
|
| 119 |
+
logger.warning(f"No code files found in {repo_path}")
|
| 120 |
+
return None
|
| 121 |
+
|
| 122 |
+
logger.info(f"Loaded {len(documents)} code files")
|
| 123 |
+
|
| 124 |
+
# Build index (using default text splitter instead of CodeSplitter to avoid tree-sitter dependency)
|
| 125 |
+
try:
|
| 126 |
+
self.index = VectorStoreIndex.from_documents(
|
| 127 |
+
documents,
|
| 128 |
+
vector_store=vector_store,
|
| 129 |
+
show_progress=True
|
| 130 |
+
)
|
| 131 |
+
logger.info("Code index built successfully")
|
| 132 |
+
except Exception as e:
|
| 133 |
+
if self.use_modal:
|
| 134 |
+
logger.warning(f"Modal embedding failed during indexing: {e}")
|
| 135 |
+
logger.info("Retrying with Gemini embeddings...")
|
| 136 |
+
|
| 137 |
+
# Switch to Gemini
|
| 138 |
+
Settings.embed_model = get_embedding_model(force_gemini=True)
|
| 139 |
+
self.use_modal = False
|
| 140 |
+
|
| 141 |
+
# Retry building index
|
| 142 |
+
self.index = VectorStoreIndex.from_documents(
|
| 143 |
+
documents,
|
| 144 |
+
vector_store=vector_store,
|
| 145 |
+
show_progress=True
|
| 146 |
+
)
|
| 147 |
+
logger.info("Code index built successfully with Gemini embeddings")
|
| 148 |
+
else:
|
| 149 |
+
raise
|
| 150 |
+
|
| 151 |
+
return self.index
|
| 152 |
+
|
| 153 |
+
def _load_code_files(self, repo_path: str, file_extensions: Optional[List[str]] = None) -> List[Document]:
|
| 154 |
+
"""
|
| 155 |
+
Load code files from repository.
|
| 156 |
+
|
| 157 |
+
Args:
|
| 158 |
+
repo_path: Path to repository
|
| 159 |
+
file_extensions: Optional list of extensions to include
|
| 160 |
+
|
| 161 |
+
Returns:
|
| 162 |
+
List of Document objects
|
| 163 |
+
"""
|
| 164 |
+
documents = []
|
| 165 |
+
repo_path = Path(repo_path)
|
| 166 |
+
|
| 167 |
+
# Default extensions if not specified
|
| 168 |
+
if file_extensions is None:
|
| 169 |
+
file_extensions = [
|
| 170 |
+
# Python
|
| 171 |
+
'.py', '.pyw', '.pyx',
|
| 172 |
+
# Java
|
| 173 |
+
'.java',
|
| 174 |
+
# JavaScript/TypeScript
|
| 175 |
+
'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs',
|
| 176 |
+
# PHP
|
| 177 |
+
'.php', '.php3', '.php4', '.php5', '.phtml',
|
| 178 |
+
# Ruby
|
| 179 |
+
'.rb', '.rbw',
|
| 180 |
+
# Go
|
| 181 |
+
'.go',
|
| 182 |
+
# C/C++
|
| 183 |
+
'.c', '.cpp', '.cc', '.cxx', '.c++', '.h', '.hpp', '.hh', '.hxx', '.h++',
|
| 184 |
+
# C#
|
| 185 |
+
'.cs',
|
| 186 |
+
# Rust
|
| 187 |
+
'.rs',
|
| 188 |
+
# Kotlin
|
| 189 |
+
'.kt', '.kts',
|
| 190 |
+
# Swift
|
| 191 |
+
'.swift',
|
| 192 |
+
# Scala
|
| 193 |
+
'.scala', '.sc',
|
| 194 |
+
# R
|
| 195 |
+
'.r', '.R',
|
| 196 |
+
# Perl
|
| 197 |
+
'.pl', '.pm', '.t', '.pod',
|
| 198 |
+
# Shell
|
| 199 |
+
'.sh', '.bash', '.zsh', '.fish'
|
| 200 |
+
]
|
| 201 |
+
|
| 202 |
+
# Walk through directory
|
| 203 |
+
for file_path in repo_path.rglob('*'):
|
| 204 |
+
if file_path.is_file() and file_path.suffix in file_extensions:
|
| 205 |
+
try:
|
| 206 |
+
# Skip hidden files and common non-code directories
|
| 207 |
+
if any(part.startswith('.') for part in file_path.parts):
|
| 208 |
+
continue
|
| 209 |
+
if any(part in ['node_modules', 'venv', '__pycache__', 'build', 'dist']
|
| 210 |
+
for part in file_path.parts):
|
| 211 |
+
continue
|
| 212 |
+
|
| 213 |
+
# Read file content
|
| 214 |
+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 215 |
+
content = f.read()
|
| 216 |
+
|
| 217 |
+
# Create document with metadata
|
| 218 |
+
doc = Document(
|
| 219 |
+
text=content,
|
| 220 |
+
metadata={
|
| 221 |
+
'file_path': str(file_path.relative_to(repo_path)),
|
| 222 |
+
'file_name': file_path.name,
|
| 223 |
+
'extension': file_path.suffix,
|
| 224 |
+
'size': len(content)
|
| 225 |
+
}
|
| 226 |
+
)
|
| 227 |
+
documents.append(doc)
|
| 228 |
+
|
| 229 |
+
except Exception as e:
|
| 230 |
+
logger.warning(f"Error reading {file_path}: {e}")
|
| 231 |
+
|
| 232 |
+
return documents
|
| 233 |
+
|
| 234 |
+
def find_similar_patterns(self, pattern_query: str, top_k: int = 20) -> List[Dict]:
|
| 235 |
+
"""
|
| 236 |
+
Find files with similar legacy patterns.
|
| 237 |
+
|
| 238 |
+
Args:
|
| 239 |
+
pattern_query: Natural language query describing the pattern
|
| 240 |
+
top_k: Number of results to return
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
List of dictionaries with file paths and relevance scores
|
| 244 |
+
"""
|
| 245 |
+
if not self.index:
|
| 246 |
+
raise ValueError("Index not built. Call build_index() first.")
|
| 247 |
+
|
| 248 |
+
logger.info(f"Searching for pattern: {pattern_query}")
|
| 249 |
+
|
| 250 |
+
# Create query engine
|
| 251 |
+
query_engine = self.index.as_query_engine(
|
| 252 |
+
similarity_top_k=top_k,
|
| 253 |
+
response_mode="tree_summarize"
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
# Execute query
|
| 257 |
+
response = query_engine.query(pattern_query)
|
| 258 |
+
|
| 259 |
+
# Extract source files and scores
|
| 260 |
+
results = []
|
| 261 |
+
for node in response.source_nodes:
|
| 262 |
+
results.append({
|
| 263 |
+
'file_path': node.metadata.get('file_path', 'unknown'),
|
| 264 |
+
'file_name': node.metadata.get('file_name', 'unknown'),
|
| 265 |
+
'score': node.score,
|
| 266 |
+
'text_snippet': node.text[:200] + '...' if len(node.text) > 200 else node.text
|
| 267 |
+
})
|
| 268 |
+
|
| 269 |
+
logger.info(f"Found {len(results)} matching files")
|
| 270 |
+
return results
|
| 271 |
+
|
| 272 |
+
def analyze_pattern_with_context(self, pattern_query: str, files: List[str]) -> str:
|
| 273 |
+
"""
|
| 274 |
+
Deep analysis of legacy pattern with full context retrieval.
|
| 275 |
+
|
| 276 |
+
Args:
|
| 277 |
+
pattern_query: Description of the pattern to analyze
|
| 278 |
+
files: List of file paths to analyze
|
| 279 |
+
|
| 280 |
+
Returns:
|
| 281 |
+
Analysis result from Gemini
|
| 282 |
+
"""
|
| 283 |
+
if not self.index:
|
| 284 |
+
raise ValueError("Index not built. Call build_index() first.")
|
| 285 |
+
|
| 286 |
+
logger.info(f"Analyzing pattern with context: {pattern_query}")
|
| 287 |
+
|
| 288 |
+
# Build enhanced query with file context
|
| 289 |
+
enhanced_query = f"""
|
| 290 |
+
Analyze the following legacy code pattern and provide:
|
| 291 |
+
1. What the code currently does
|
| 292 |
+
2. Why it's problematic (security, performance, maintainability)
|
| 293 |
+
3. Modern equivalent (recommended library/pattern)
|
| 294 |
+
4. Migration steps with risk assessment
|
| 295 |
+
|
| 296 |
+
Pattern to analyze: {pattern_query}
|
| 297 |
+
Files to focus on: {', '.join(files)}
|
| 298 |
+
|
| 299 |
+
Provide detailed analysis in JSON format with keys:
|
| 300 |
+
- analysis: Overall analysis
|
| 301 |
+
- issues: List of specific issues
|
| 302 |
+
- recommendation: Recommended modern approach
|
| 303 |
+
- steps: Migration steps
|
| 304 |
+
- risks: Risk assessment
|
| 305 |
+
"""
|
| 306 |
+
|
| 307 |
+
# Create query engine with custom prompt
|
| 308 |
+
query_engine = self.index.as_query_engine(
|
| 309 |
+
similarity_top_k=10,
|
| 310 |
+
response_mode="compact"
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
# Execute analysis
|
| 314 |
+
response = query_engine.query(enhanced_query)
|
| 315 |
+
|
| 316 |
+
return response.response
|
| 317 |
+
|
| 318 |
+
def get_transformation_examples(self, pattern_type: str, top_k: int = 5) -> List[Dict]:
|
| 319 |
+
"""
|
| 320 |
+
Retrieve examples of successful transformations for a pattern type.
|
| 321 |
+
|
| 322 |
+
Args:
|
| 323 |
+
pattern_type: Type of pattern (e.g., "MySQLdb to SQLAlchemy")
|
| 324 |
+
top_k: Number of examples to retrieve
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
List of example transformations
|
| 328 |
+
"""
|
| 329 |
+
if not self.index:
|
| 330 |
+
raise ValueError("Index not built. Call build_index() first.")
|
| 331 |
+
|
| 332 |
+
query = f"Find examples of code that was successfully transformed from {pattern_type}"
|
| 333 |
+
|
| 334 |
+
query_engine = self.index.as_query_engine(
|
| 335 |
+
similarity_top_k=top_k,
|
| 336 |
+
response_mode="compact"
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
response = query_engine.query(query)
|
| 340 |
+
|
| 341 |
+
# Extract examples from source nodes
|
| 342 |
+
examples = []
|
| 343 |
+
for node in response.source_nodes:
|
| 344 |
+
examples.append({
|
| 345 |
+
'file_path': node.metadata.get('file_path', 'unknown'),
|
| 346 |
+
'code_snippet': node.text,
|
| 347 |
+
'score': node.score
|
| 348 |
+
})
|
| 349 |
+
|
| 350 |
+
return examples
|
src/ui/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""UI components for the Legacy Code Modernizer Agent."""
|
src/ui/app.py
ADDED
|
@@ -0,0 +1,1045 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio UI for Legacy Code Modernizer Agent - Phase 5 Complete."""
|
| 2 |
+
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import os
|
| 5 |
+
import asyncio
|
| 6 |
+
import logging
|
| 7 |
+
import zipfile
|
| 8 |
+
import tempfile
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
# Import orchestrator
|
| 13 |
+
import sys
|
| 14 |
+
import os
|
| 15 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 16 |
+
from src.workflow.orchestrator import ModernizationOrchestrator
|
| 17 |
+
|
| 18 |
+
# Load environment variables
|
| 19 |
+
load_dotenv()
|
| 20 |
+
|
| 21 |
+
# Configure logging with sensitive data redaction
|
| 22 |
+
class SensitiveDataFilter(logging.Filter):
|
| 23 |
+
"""Filter to redact sensitive information from logs."""
|
| 24 |
+
def __init__(self):
|
| 25 |
+
super().__init__()
|
| 26 |
+
self.sensitive_patterns = []
|
| 27 |
+
|
| 28 |
+
# Collect sensitive values from environment
|
| 29 |
+
sensitive_keys = [
|
| 30 |
+
"GEMINI_API_KEY",
|
| 31 |
+
"NEBIUS_API_KEY",
|
| 32 |
+
"OPENAI_API_KEY",
|
| 33 |
+
"MODAL_TOKEN_ID",
|
| 34 |
+
"MODAL_TOKEN_SECRET",
|
| 35 |
+
"GITHUB_TOKEN"
|
| 36 |
+
]
|
| 37 |
+
|
| 38 |
+
for key in sensitive_keys:
|
| 39 |
+
value = os.getenv(key)
|
| 40 |
+
if value and len(value) > 5: # Only redact if value is substantial
|
| 41 |
+
self.sensitive_patterns.append(value)
|
| 42 |
+
|
| 43 |
+
def filter(self, record):
|
| 44 |
+
msg = str(record.msg)
|
| 45 |
+
for sensitive_value in self.sensitive_patterns:
|
| 46 |
+
if sensitive_value in msg:
|
| 47 |
+
msg = msg.replace(sensitive_value, "[REDACTED]")
|
| 48 |
+
record.msg = msg
|
| 49 |
+
return True
|
| 50 |
+
|
| 51 |
+
# Initialize logging with redaction
|
| 52 |
+
logging.basicConfig(level=logging.INFO)
|
| 53 |
+
root_logger = logging.getLogger()
|
| 54 |
+
root_logger.addFilter(SensitiveDataFilter())
|
| 55 |
+
logger = logging.getLogger(__name__)
|
| 56 |
+
|
| 57 |
+
# Initialize orchestrator with intelligent pattern matching
|
| 58 |
+
orchestrator = ModernizationOrchestrator(use_intelligent_matcher=True)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# Supported file extensions for single file upload
|
| 62 |
+
SUPPORTED_EXTENSIONS = {
|
| 63 |
+
# Python
|
| 64 |
+
'.py', '.pyw', '.pyx',
|
| 65 |
+
# Java
|
| 66 |
+
'.java',
|
| 67 |
+
# JavaScript/TypeScript
|
| 68 |
+
'.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
# Language to file extension mapping
|
| 72 |
+
LANGUAGE_EXTENSIONS = {
|
| 73 |
+
'python': ['.py', '.pyw', '.pyx'],
|
| 74 |
+
'java': ['.java'],
|
| 75 |
+
'javascript': ['.js', '.jsx', '.mjs', '.cjs'],
|
| 76 |
+
'typescript': ['.ts', '.tsx']
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
# Target version options for each language (Updated November 2025)
|
| 80 |
+
TARGET_VERSIONS = {
|
| 81 |
+
'python': ['Python 3.14', 'Python 3.13', 'Python 3.12', 'Python 3.11', 'Python 3.10'],
|
| 82 |
+
'java': ['Java 25 LTS', 'Java 23', 'Java 21 LTS', 'Java 17 LTS'],
|
| 83 |
+
'javascript': ['ES2025', 'ES2024', 'Node.js 25', 'Node.js 24 LTS', 'Node.js 22 LTS'],
|
| 84 |
+
'typescript': ['TypeScript 5.9', 'TypeScript 5.8', 'TypeScript 5.7', 'TypeScript 5.6']
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
# Framework-specific versions (Updated November 2025)
|
| 88 |
+
FRAMEWORK_VERSIONS = [
|
| 89 |
+
'React 19', 'React 18', 'React 18 (Hooks)', 'React 17',
|
| 90 |
+
'Angular 21', 'Angular 20', 'Angular 19',
|
| 91 |
+
'Vue 3.5', 'Vue 3.4', 'Vue 2.7',
|
| 92 |
+
'Django 5.2 LTS', 'Django 5.1', 'Django 5.0',
|
| 93 |
+
'Flask 3.1', 'Flask 3.0', 'Flask 2.3',
|
| 94 |
+
'Spring Boot 4.0', 'Spring Boot 3.4', 'Spring Boot 3.3',
|
| 95 |
+
'Laravel 12', 'Laravel 11',
|
| 96 |
+
'Rails 8.1', 'Rails 8.0', 'Rails 7.2',
|
| 97 |
+
'Express 5.1', 'Express 5.0', 'Express 4.21',
|
| 98 |
+
'FastAPI 0.122', 'FastAPI 0.115',
|
| 99 |
+
'Next.js 16', 'Next.js 15', 'Next.js 14'
|
| 100 |
+
]
|
| 101 |
+
|
| 102 |
+
def detect_language_from_extension(file_ext):
|
| 103 |
+
"""Detect language from file extension."""
|
| 104 |
+
for lang, exts in LANGUAGE_EXTENSIONS.items():
|
| 105 |
+
if file_ext in exts:
|
| 106 |
+
return lang
|
| 107 |
+
return None
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def get_target_versions_for_language(language):
|
| 111 |
+
"""Get appropriate target versions for a detected language."""
|
| 112 |
+
if not language:
|
| 113 |
+
# Return all options if language unknown
|
| 114 |
+
all_versions = []
|
| 115 |
+
for versions in TARGET_VERSIONS.values():
|
| 116 |
+
all_versions.extend(versions)
|
| 117 |
+
all_versions.extend(FRAMEWORK_VERSIONS)
|
| 118 |
+
return sorted(set(all_versions))
|
| 119 |
+
|
| 120 |
+
# Get language-specific versions
|
| 121 |
+
versions = TARGET_VERSIONS.get(language, [])
|
| 122 |
+
|
| 123 |
+
# Add framework versions for web languages
|
| 124 |
+
if language in ['javascript', 'typescript']:
|
| 125 |
+
versions.extend([v for v in FRAMEWORK_VERSIONS if 'React' in v or 'Angular' in v or 'Vue' in v or 'Express' in v])
|
| 126 |
+
elif language == 'python':
|
| 127 |
+
versions.extend([v for v in FRAMEWORK_VERSIONS if 'Django' in v or 'Flask' in v or 'FastAPI' in v])
|
| 128 |
+
elif language == 'java':
|
| 129 |
+
versions.extend([v for v in FRAMEWORK_VERSIONS if 'Spring' in v])
|
| 130 |
+
elif language == 'php':
|
| 131 |
+
versions.extend([v for v in FRAMEWORK_VERSIONS if 'Laravel' in v])
|
| 132 |
+
elif language == 'ruby':
|
| 133 |
+
versions.extend([v for v in FRAMEWORK_VERSIONS if 'Rails' in v])
|
| 134 |
+
|
| 135 |
+
return versions if versions else get_target_versions_for_language(None)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def detect_languages_from_files(file_paths):
|
| 139 |
+
"""
|
| 140 |
+
Detect languages from multiple files.
|
| 141 |
+
|
| 142 |
+
Args:
|
| 143 |
+
file_paths: List of file paths
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
Dictionary with language counts and suggested target versions
|
| 147 |
+
"""
|
| 148 |
+
language_counts = {}
|
| 149 |
+
|
| 150 |
+
for file_path in file_paths:
|
| 151 |
+
ext = Path(file_path).suffix.lower()
|
| 152 |
+
lang = detect_language_from_extension(ext)
|
| 153 |
+
if lang:
|
| 154 |
+
language_counts[lang] = language_counts.get(lang, 0) + 1
|
| 155 |
+
|
| 156 |
+
if not language_counts:
|
| 157 |
+
return None, []
|
| 158 |
+
|
| 159 |
+
# Get primary language (most files)
|
| 160 |
+
primary_language = max(language_counts.items(), key=lambda x: x[1])[0]
|
| 161 |
+
|
| 162 |
+
# Get suggested versions
|
| 163 |
+
suggested_versions = get_target_versions_for_language(primary_language)
|
| 164 |
+
|
| 165 |
+
return primary_language, suggested_versions
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def validate_single_file(file_path):
|
| 169 |
+
"""
|
| 170 |
+
Validate if a single file is supported for modernization.
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
file_path: Path to the uploaded file
|
| 174 |
+
|
| 175 |
+
Returns:
|
| 176 |
+
Tuple of (is_valid, message, file_info, suggested_versions)
|
| 177 |
+
"""
|
| 178 |
+
if not file_path:
|
| 179 |
+
return False, "❌ No file uploaded", None, []
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
file_name = Path(file_path).name
|
| 183 |
+
file_ext = Path(file_path).suffix.lower()
|
| 184 |
+
file_size = os.path.getsize(file_path)
|
| 185 |
+
|
| 186 |
+
# Check file extension
|
| 187 |
+
if file_ext not in SUPPORTED_EXTENSIONS:
|
| 188 |
+
supported_list = ', '.join(sorted(SUPPORTED_EXTENSIONS))
|
| 189 |
+
return False, f"❌ Unsupported file type: {file_ext}\n\n✅ Supported types:\n{supported_list}", None, []
|
| 190 |
+
|
| 191 |
+
# Check file size (max 10MB for single file)
|
| 192 |
+
max_size = 10 * 1024 * 1024 # 10MB
|
| 193 |
+
if file_size > max_size:
|
| 194 |
+
return False, f"❌ File too large: {file_size / 1024 / 1024:.2f} MB (max 10 MB)", None, []
|
| 195 |
+
|
| 196 |
+
# Read file to check if it's valid text
|
| 197 |
+
try:
|
| 198 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 199 |
+
content = f.read(1000) # Read first 1000 chars
|
| 200 |
+
line_count = len(content.split('\n'))
|
| 201 |
+
except UnicodeDecodeError:
|
| 202 |
+
return False, f"❌ File is not a valid text file (encoding error)", None, []
|
| 203 |
+
|
| 204 |
+
# Detect language and get suggested versions
|
| 205 |
+
language = detect_language_from_extension(file_ext)
|
| 206 |
+
suggested_versions = get_target_versions_for_language(language)
|
| 207 |
+
|
| 208 |
+
# Language name mapping
|
| 209 |
+
language_names = {
|
| 210 |
+
'python': 'Python',
|
| 211 |
+
'java': 'Java',
|
| 212 |
+
'javascript': 'JavaScript',
|
| 213 |
+
'typescript': 'TypeScript'
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
file_info = {
|
| 217 |
+
'name': file_name,
|
| 218 |
+
'extension': file_ext,
|
| 219 |
+
'size': file_size,
|
| 220 |
+
'path': file_path,
|
| 221 |
+
'language': language
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
lang_display = language_names.get(language, 'Unknown')
|
| 225 |
+
|
| 226 |
+
message = f"""✅ File validated successfully!
|
| 227 |
+
|
| 228 |
+
📄 File: {file_name}
|
| 229 |
+
📊 Type: {file_ext} ({lang_display})
|
| 230 |
+
💾 Size: {file_size / 1024:.2f} KB
|
| 231 |
+
|
| 232 |
+
🎯 Suggested target versions updated in dropdown
|
| 233 |
+
|
| 234 |
+
✨ Ready to modernize! Click 'Start Modernization' button."""
|
| 235 |
+
|
| 236 |
+
return True, message, file_info, suggested_versions
|
| 237 |
+
|
| 238 |
+
except Exception as e:
|
| 239 |
+
return False, f"❌ Error validating file: {str(e)}", None, []
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def process_single_file(file_path):
|
| 243 |
+
"""
|
| 244 |
+
Process single file upload by creating a temporary ZIP.
|
| 245 |
+
|
| 246 |
+
Args:
|
| 247 |
+
file_path: Path to the uploaded file
|
| 248 |
+
|
| 249 |
+
Returns:
|
| 250 |
+
Tuple of (status message, zip path, suggested_versions)
|
| 251 |
+
"""
|
| 252 |
+
is_valid, message, file_info, suggested_versions = validate_single_file(file_path)
|
| 253 |
+
|
| 254 |
+
if not is_valid:
|
| 255 |
+
return message, None, []
|
| 256 |
+
|
| 257 |
+
try:
|
| 258 |
+
# Create a temporary ZIP containing the single file
|
| 259 |
+
import tempfile
|
| 260 |
+
import zipfile
|
| 261 |
+
|
| 262 |
+
zip_path = tempfile.NamedTemporaryFile(delete=False, suffix='.zip')
|
| 263 |
+
with zipfile.ZipFile(zip_path.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 264 |
+
zipf.write(file_path, file_info['name'])
|
| 265 |
+
|
| 266 |
+
return message, zip_path.name, suggested_versions
|
| 267 |
+
|
| 268 |
+
except Exception as e:
|
| 269 |
+
return f"❌ Error processing file: {str(e)}", None, []
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def detect_languages_from_zip(zip_path):
|
| 273 |
+
"""
|
| 274 |
+
Detect languages from files in a ZIP archive.
|
| 275 |
+
|
| 276 |
+
Args:
|
| 277 |
+
zip_path: Path to ZIP file
|
| 278 |
+
|
| 279 |
+
Returns:
|
| 280 |
+
Tuple of (language_summary, suggested_versions)
|
| 281 |
+
"""
|
| 282 |
+
try:
|
| 283 |
+
import zipfile
|
| 284 |
+
|
| 285 |
+
file_paths = []
|
| 286 |
+
with zipfile.ZipFile(zip_path, 'r') as zipf:
|
| 287 |
+
file_paths = [name for name in zipf.namelist() if not name.endswith('/')]
|
| 288 |
+
|
| 289 |
+
primary_language, suggested_versions = detect_languages_from_files(file_paths)
|
| 290 |
+
|
| 291 |
+
if not primary_language:
|
| 292 |
+
return "Multiple file types detected", []
|
| 293 |
+
|
| 294 |
+
language_names = {
|
| 295 |
+
'python': 'Python',
|
| 296 |
+
'java': 'Java',
|
| 297 |
+
'javascript': 'JavaScript',
|
| 298 |
+
'typescript': 'TypeScript'
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
return f"Primary language: {language_names.get(primary_language, 'Unknown')}", suggested_versions
|
| 302 |
+
|
| 303 |
+
except Exception as e:
|
| 304 |
+
logger.error(f"Error detecting languages from ZIP: {e}")
|
| 305 |
+
return "Could not detect language", []
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def clone_github_repo(github_url):
|
| 309 |
+
"""
|
| 310 |
+
Clone GitHub repository and show preview.
|
| 311 |
+
|
| 312 |
+
Args:
|
| 313 |
+
github_url: GitHub repository URL
|
| 314 |
+
|
| 315 |
+
Returns:
|
| 316 |
+
Tuple of (status message, cloned repo path)
|
| 317 |
+
"""
|
| 318 |
+
if not github_url or not github_url.strip():
|
| 319 |
+
return "❌ Please enter a GitHub repository URL", None, gr.update(visible=True)
|
| 320 |
+
|
| 321 |
+
try:
|
| 322 |
+
import tempfile
|
| 323 |
+
import subprocess
|
| 324 |
+
|
| 325 |
+
# Clean URL (remove .git if present)
|
| 326 |
+
github_url = github_url.strip().rstrip('/')
|
| 327 |
+
if github_url.endswith('.git'):
|
| 328 |
+
github_url = github_url[:-4]
|
| 329 |
+
|
| 330 |
+
# Create temp directory for clone
|
| 331 |
+
temp_dir = tempfile.mkdtemp(prefix="github_clone_")
|
| 332 |
+
|
| 333 |
+
# Clone repository
|
| 334 |
+
result = subprocess.run(
|
| 335 |
+
["git", "clone", "--depth", "1", github_url, temp_dir],
|
| 336 |
+
capture_output=True,
|
| 337 |
+
text=True,
|
| 338 |
+
timeout=300
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
if result.returncode != 0:
|
| 342 |
+
error_msg = result.stderr if result.stderr else "Unknown error"
|
| 343 |
+
return f"❌ Failed to clone repository:\n{error_msg}", None, gr.update(visible=True)
|
| 344 |
+
|
| 345 |
+
# Count files (only supported extensions)
|
| 346 |
+
code_extensions = {'.py', '.pyw', '.pyx', '.java', '.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'}
|
| 347 |
+
file_count = 0
|
| 348 |
+
code_files = []
|
| 349 |
+
|
| 350 |
+
for root, dirs, files in os.walk(temp_dir):
|
| 351 |
+
# Skip .git directory
|
| 352 |
+
if '.git' in root:
|
| 353 |
+
continue
|
| 354 |
+
for file in files:
|
| 355 |
+
file_path = os.path.join(root, file)
|
| 356 |
+
rel_path = os.path.relpath(file_path, temp_dir)
|
| 357 |
+
ext = os.path.splitext(file)[1].lower()
|
| 358 |
+
if ext in code_extensions:
|
| 359 |
+
file_count += 1
|
| 360 |
+
code_files.append(rel_path)
|
| 361 |
+
|
| 362 |
+
# Create ZIP from cloned repo
|
| 363 |
+
import zipfile
|
| 364 |
+
zip_path = tempfile.NamedTemporaryFile(delete=False, suffix='.zip')
|
| 365 |
+
with zipfile.ZipFile(zip_path.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 366 |
+
for root, dirs, files in os.walk(temp_dir):
|
| 367 |
+
# Skip .git directory
|
| 368 |
+
if '.git' in root:
|
| 369 |
+
continue
|
| 370 |
+
for file in files:
|
| 371 |
+
file_path = os.path.join(root, file)
|
| 372 |
+
arcname = os.path.relpath(file_path, temp_dir)
|
| 373 |
+
zipf.write(file_path, arcname)
|
| 374 |
+
|
| 375 |
+
# Detect languages
|
| 376 |
+
all_code_files = []
|
| 377 |
+
for root, dirs, files in os.walk(temp_dir):
|
| 378 |
+
if '.git' in root:
|
| 379 |
+
continue
|
| 380 |
+
for file in files:
|
| 381 |
+
ext = os.path.splitext(file)[1].lower()
|
| 382 |
+
if ext in SUPPORTED_EXTENSIONS:
|
| 383 |
+
all_code_files.append(os.path.join(root, file))
|
| 384 |
+
|
| 385 |
+
primary_language, suggested_versions = detect_languages_from_files(all_code_files)
|
| 386 |
+
|
| 387 |
+
language_names = {
|
| 388 |
+
'python': 'Python',
|
| 389 |
+
'java': 'Java',
|
| 390 |
+
'javascript': 'JavaScript',
|
| 391 |
+
'typescript': 'TypeScript'
|
| 392 |
+
}
|
| 393 |
+
|
| 394 |
+
# Generate preview message with all files
|
| 395 |
+
status = f"""✅ Repository cloned successfully!
|
| 396 |
+
|
| 397 |
+
📁 Repository: {github_url.split('/')[-1]}
|
| 398 |
+
📊 Code files found: {file_count}
|
| 399 |
+
🔤 Primary language: {language_names.get(primary_language, 'Mixed')}
|
| 400 |
+
|
| 401 |
+
📝 Loaded files ({file_count} total):
|
| 402 |
+
"""
|
| 403 |
+
# Show all files, not just first 15
|
| 404 |
+
for f in code_files:
|
| 405 |
+
status += f" • {f}\n"
|
| 406 |
+
|
| 407 |
+
status += "\n🎯 Suggested target versions updated in dropdown"
|
| 408 |
+
status += "\n✨ Ready to modernize! Click 'Start Modernization' button above."
|
| 409 |
+
|
| 410 |
+
return status, zip_path.name, gr.update(visible=True), suggested_versions
|
| 411 |
+
|
| 412 |
+
except subprocess.TimeoutExpired:
|
| 413 |
+
return "❌ Clone timeout (>5 minutes). Repository might be too large.", None, gr.update(visible=True)
|
| 414 |
+
except Exception as e:
|
| 415 |
+
return f"❌ Error cloning from GitHub: {str(e)}", None, gr.update(visible=True)
|
| 416 |
+
|
| 417 |
+
|
| 418 |
+
def modernize_code(repo_file, target_version, create_pr, repo_url, github_token, cloned_repo_path, single_file_path, progress=gr.Progress()):
|
| 419 |
+
"""
|
| 420 |
+
Main function to process uploaded repository.
|
| 421 |
+
|
| 422 |
+
Args:
|
| 423 |
+
repo_file: Uploaded ZIP file containing the repository
|
| 424 |
+
target_version: Target language/framework version
|
| 425 |
+
create_pr: Whether to create GitHub PR
|
| 426 |
+
repo_url: GitHub repository URL for PR
|
| 427 |
+
github_token: GitHub personal access token for PR creation
|
| 428 |
+
cloned_repo_path: Path to cloned repo ZIP (if using GitHub clone)
|
| 429 |
+
single_file_path: Path to single file ZIP (if using single file upload)
|
| 430 |
+
progress: Gradio progress tracker
|
| 431 |
+
|
| 432 |
+
Returns:
|
| 433 |
+
Tuple of (status message, download files)
|
| 434 |
+
"""
|
| 435 |
+
logger.info(f"modernize_code called with: repo_file={repo_file}, single_file_path={single_file_path}, cloned_repo_path={cloned_repo_path}")
|
| 436 |
+
|
| 437 |
+
# Priority: single file > cloned repo > uploaded file
|
| 438 |
+
if single_file_path:
|
| 439 |
+
logger.info(f"Single file path detected: {single_file_path}")
|
| 440 |
+
repo_file = type('obj', (object,), {'name': single_file_path})()
|
| 441 |
+
logger.info(f"Using single file path: {single_file_path}")
|
| 442 |
+
elif cloned_repo_path:
|
| 443 |
+
logger.info(f"Cloned repo path detected: {cloned_repo_path}")
|
| 444 |
+
repo_file = type('obj', (object,), {'name': cloned_repo_path})()
|
| 445 |
+
logger.info(f"Using cloned repo path: {cloned_repo_path}")
|
| 446 |
+
else:
|
| 447 |
+
logger.info("Using uploaded ZIP file")
|
| 448 |
+
|
| 449 |
+
# Check if any valid input source is provided
|
| 450 |
+
if repo_file is None:
|
| 451 |
+
logger.error("No input source provided")
|
| 452 |
+
return "❌ Please upload a repository ZIP file, single file, or clone from GitHub.", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
| 453 |
+
|
| 454 |
+
logger.info(f"Processing with file: {repo_file.name}")
|
| 455 |
+
|
| 456 |
+
try:
|
| 457 |
+
file_path = repo_file.name
|
| 458 |
+
file_size = os.path.getsize(file_path)
|
| 459 |
+
|
| 460 |
+
# Initial status
|
| 461 |
+
status = f"""✅ Processing started!
|
| 462 |
+
|
| 463 |
+
📁 File: {Path(file_path).name}
|
| 464 |
+
📊 Size: {file_size / 1024:.2f} KB
|
| 465 |
+
🎯 Target: {target_version}
|
| 466 |
+
|
| 467 |
+
"""
|
| 468 |
+
progress(0.05, desc="Starting...")
|
| 469 |
+
yield status, gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) # Hide download buttons initially
|
| 470 |
+
|
| 471 |
+
# Create a callback to update progress from orchestrator
|
| 472 |
+
current_status = [status] # Use list to allow modification in nested function
|
| 473 |
+
|
| 474 |
+
def progress_callback(phase, message):
|
| 475 |
+
"""Callback to update progress from orchestrator."""
|
| 476 |
+
phase_progress = {
|
| 477 |
+
"Phase 1": 0.15,
|
| 478 |
+
"Phase 2": 0.30,
|
| 479 |
+
"Phase 3": 0.45,
|
| 480 |
+
"Phase 4": 0.65,
|
| 481 |
+
"Phase 5": 0.85
|
| 482 |
+
}
|
| 483 |
+
prog_value = phase_progress.get(phase, 0.5)
|
| 484 |
+
progress(prog_value, desc=f"{phase}: {message}")
|
| 485 |
+
current_status[0] += f"⏳ {phase}: {message}\n"
|
| 486 |
+
|
| 487 |
+
# Run orchestrator with callback
|
| 488 |
+
progress(0.1, desc="Initializing workflow...")
|
| 489 |
+
|
| 490 |
+
loop = asyncio.new_event_loop()
|
| 491 |
+
asyncio.set_event_loop(loop)
|
| 492 |
+
|
| 493 |
+
results = loop.run_until_complete(
|
| 494 |
+
orchestrator.modernize_repository(
|
| 495 |
+
repo_path=file_path,
|
| 496 |
+
target_version=target_version,
|
| 497 |
+
create_pr=create_pr,
|
| 498 |
+
repo_url=repo_url if create_pr else None,
|
| 499 |
+
github_token=github_token if github_token and github_token.strip() else None,
|
| 500 |
+
progress_callback=progress_callback
|
| 501 |
+
)
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
loop.close()
|
| 505 |
+
|
| 506 |
+
progress(0.95, desc="Preparing downloads...")
|
| 507 |
+
status = current_status[0]
|
| 508 |
+
|
| 509 |
+
# Prepare download files
|
| 510 |
+
modernized_zip = None
|
| 511 |
+
tests_zip = None
|
| 512 |
+
report_file = None
|
| 513 |
+
|
| 514 |
+
if results.get('output'):
|
| 515 |
+
import zipfile
|
| 516 |
+
import tempfile
|
| 517 |
+
import time
|
| 518 |
+
|
| 519 |
+
# Create timestamp for file naming
|
| 520 |
+
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
| 521 |
+
|
| 522 |
+
output_dir = Path(results['output']['modernized_files'])
|
| 523 |
+
|
| 524 |
+
# Get list of files that were actually transformed in this run
|
| 525 |
+
transformed_files = []
|
| 526 |
+
if results.get('phases', {}).get('transformation'):
|
| 527 |
+
# Extract file paths from transformation results
|
| 528 |
+
for t in results.get('transformations', []):
|
| 529 |
+
if 'file_path' in t:
|
| 530 |
+
transformed_files.append(Path(t['file_path']).name)
|
| 531 |
+
|
| 532 |
+
# Create ZIP of modernized files with better naming - ONLY current run files
|
| 533 |
+
if output_dir.exists() and transformed_files:
|
| 534 |
+
modernized_zip = tempfile.NamedTemporaryFile(
|
| 535 |
+
delete=False,
|
| 536 |
+
suffix='.zip',
|
| 537 |
+
prefix=f'modernized_code_{timestamp}_'
|
| 538 |
+
)
|
| 539 |
+
with zipfile.ZipFile(modernized_zip.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 540 |
+
# Only include files from current transformation
|
| 541 |
+
for file in output_dir.iterdir():
|
| 542 |
+
if file.is_file() and file.name in transformed_files:
|
| 543 |
+
zipf.write(file, file.name)
|
| 544 |
+
modernized_zip.close()
|
| 545 |
+
else:
|
| 546 |
+
modernized_zip = None
|
| 547 |
+
|
| 548 |
+
# Create ZIP of test files with better naming - ONLY current run files
|
| 549 |
+
tests_dir = Path(results['output']['test_files'])
|
| 550 |
+
if tests_dir.exists() and transformed_files:
|
| 551 |
+
tests_zip = tempfile.NamedTemporaryFile(
|
| 552 |
+
delete=False,
|
| 553 |
+
suffix='.zip',
|
| 554 |
+
prefix=f'test_files_{timestamp}_'
|
| 555 |
+
)
|
| 556 |
+
with zipfile.ZipFile(tests_zip.name, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 557 |
+
# Only include test files from current transformation
|
| 558 |
+
for file in tests_dir.iterdir():
|
| 559 |
+
if file.is_file():
|
| 560 |
+
# Check if this test file corresponds to a transformed file
|
| 561 |
+
test_base = file.name.replace('test_', '')
|
| 562 |
+
if test_base in transformed_files:
|
| 563 |
+
zipf.write(file, file.name)
|
| 564 |
+
tests_zip.close()
|
| 565 |
+
else:
|
| 566 |
+
tests_zip = None
|
| 567 |
+
|
| 568 |
+
# Create report file with UTF-8 encoding and better naming
|
| 569 |
+
report_file = tempfile.NamedTemporaryFile(
|
| 570 |
+
delete=False,
|
| 571 |
+
suffix='.txt',
|
| 572 |
+
prefix=f'modernization_report_{timestamp}_',
|
| 573 |
+
mode='w',
|
| 574 |
+
encoding='utf-8'
|
| 575 |
+
)
|
| 576 |
+
report_content = orchestrator.generate_report(results)
|
| 577 |
+
report_file.write(report_content)
|
| 578 |
+
report_file.close()
|
| 579 |
+
|
| 580 |
+
# Generate final report
|
| 581 |
+
if results['success']:
|
| 582 |
+
status += "\n" + "=" * 60 + "\n"
|
| 583 |
+
status += "✅ MODERNIZATION COMPLETE!\n"
|
| 584 |
+
status += "=" * 60 + "\n\n"
|
| 585 |
+
|
| 586 |
+
stats = results.get('statistics', {})
|
| 587 |
+
status += f"📊 **Statistics:**\n"
|
| 588 |
+
status += f" • Total files: {stats.get('total_files', 0)}\n"
|
| 589 |
+
status += f" • Files modernized: {stats.get('files_modernized', 0)}\n"
|
| 590 |
+
status += f" • Tests generated: {stats.get('tests_generated', 0)}\n"
|
| 591 |
+
status += f" • Test pass rate: {stats.get('test_pass_rate', 0):.1f}%\n"
|
| 592 |
+
# Only show coverage if it's greater than 0
|
| 593 |
+
if stats.get('average_coverage', 0) > 0:
|
| 594 |
+
status += f" • Code coverage: {stats.get('average_coverage', 0):.1f}%\n"
|
| 595 |
+
status += "\n"
|
| 596 |
+
|
| 597 |
+
# Phase details
|
| 598 |
+
phases = results.get('phases', {})
|
| 599 |
+
|
| 600 |
+
if 'classification' in phases:
|
| 601 |
+
c = phases['classification']
|
| 602 |
+
status += f"📋 **Classification:**\n"
|
| 603 |
+
status += f" • High priority: {c.get('modernize_high', 0)} files\n"
|
| 604 |
+
status += f" • Low priority: {c.get('modernize_low', 0)} files\n"
|
| 605 |
+
status += f" • Skip: {c.get('skip', 0)} files\n\n"
|
| 606 |
+
|
| 607 |
+
if 'search' in phases:
|
| 608 |
+
s = phases['search']
|
| 609 |
+
status += f"🔍 **Semantic Search:**\n"
|
| 610 |
+
status += f" • Indexed files: {s.get('indexed_files', 0)}\n"
|
| 611 |
+
status += f" • Pattern groups: {s.get('pattern_groups', 0)}\n\n"
|
| 612 |
+
|
| 613 |
+
if 'validation' in phases:
|
| 614 |
+
v = phases['validation']
|
| 615 |
+
status += f"✅ **Validation:**\n"
|
| 616 |
+
status += f" • Tests run: {v.get('total_tests', 0)}\n"
|
| 617 |
+
status += f" • Tests passed: {v.get('tests_passed', 0)}\n"
|
| 618 |
+
status += f" • Tests failed: {v.get('tests_failed', 0)}\n"
|
| 619 |
+
status += f" • Pass rate: {v.get('pass_rate', 0):.1f}%\n"
|
| 620 |
+
|
| 621 |
+
# Show execution mode
|
| 622 |
+
exec_mode = v.get('execution_mode', 'unknown')
|
| 623 |
+
if exec_mode == 'modal':
|
| 624 |
+
status += f" • Execution: 🚀 Modal (cloud)\n\n"
|
| 625 |
+
elif exec_mode == 'local':
|
| 626 |
+
status += f" • Execution: 💻 Local\n\n"
|
| 627 |
+
else:
|
| 628 |
+
status += f"\n"
|
| 629 |
+
|
| 630 |
+
if 'github_pr' in phases:
|
| 631 |
+
pr = phases['github_pr']
|
| 632 |
+
if pr.get('success'):
|
| 633 |
+
status += f"🔗 **GitHub PR:**\n"
|
| 634 |
+
status += f" • PR URL: {pr.get('pr_url', 'N/A')}\n"
|
| 635 |
+
status += f" • PR Number: #{pr.get('pr_number', 0)}\n"
|
| 636 |
+
status += f" • Branch: {pr.get('branch', 'N/A')}\n\n"
|
| 637 |
+
else:
|
| 638 |
+
status += f"⚠️ **GitHub PR:** {pr.get('error', 'Failed')}\n\n"
|
| 639 |
+
|
| 640 |
+
if results.get('errors'):
|
| 641 |
+
status += f"⚠️ **Warnings:**\n"
|
| 642 |
+
for error in results['errors'][:5]:
|
| 643 |
+
status += f" • {error}\n"
|
| 644 |
+
|
| 645 |
+
# Add output locations
|
| 646 |
+
if results.get('output'):
|
| 647 |
+
status += f"\n📁 **Output Locations:**\n"
|
| 648 |
+
status += f" • Modernized files: {results['output']['modernized_files']}\n"
|
| 649 |
+
status += f" • Test files: {results['output']['test_files']}\n"
|
| 650 |
+
status += f" • Original files: {results['output']['original_files']}\n"
|
| 651 |
+
|
| 652 |
+
status += "\n" + "=" * 60 + "\n"
|
| 653 |
+
status += "🎉 Ready for review and deployment!\n"
|
| 654 |
+
status += "📥 Download files using the buttons below.\n"
|
| 655 |
+
|
| 656 |
+
else:
|
| 657 |
+
status += "\n❌ MODERNIZATION FAILED\n\n"
|
| 658 |
+
status += "Errors:\n"
|
| 659 |
+
for error in results.get('errors', []):
|
| 660 |
+
status += f" • {error}\n"
|
| 661 |
+
|
| 662 |
+
progress(1.0, desc="Complete!")
|
| 663 |
+
|
| 664 |
+
# Final yield with status and download files (make visible)
|
| 665 |
+
yield (
|
| 666 |
+
status,
|
| 667 |
+
gr.update(value=modernized_zip.name, visible=True) if modernized_zip else gr.update(visible=False),
|
| 668 |
+
gr.update(value=tests_zip.name, visible=True) if tests_zip else gr.update(visible=False),
|
| 669 |
+
gr.update(value=report_file.name, visible=True) if report_file else gr.update(visible=False)
|
| 670 |
+
)
|
| 671 |
+
|
| 672 |
+
except Exception as e:
|
| 673 |
+
logger.error(f"Error in modernize_code: {e}", exc_info=True)
|
| 674 |
+
progress(1.0, desc="Error occurred")
|
| 675 |
+
yield f"❌ Error: {str(e)}\n\nPlease check logs for details.", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
| 676 |
+
|
| 677 |
+
# Custom CSS for better styling
|
| 678 |
+
custom_css = """
|
| 679 |
+
.gradio-container {
|
| 680 |
+
font-family: 'Inter', sans-serif;
|
| 681 |
+
}
|
| 682 |
+
.header {
|
| 683 |
+
text-align: center;
|
| 684 |
+
padding: 20px;
|
| 685 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 686 |
+
color: white;
|
| 687 |
+
border-radius: 10px;
|
| 688 |
+
margin-bottom: 20px;
|
| 689 |
+
}
|
| 690 |
+
/* Style token input to match other inputs */
|
| 691 |
+
.token-input input {
|
| 692 |
+
background-color: var(--input-background-fill) !important;
|
| 693 |
+
border: 1px solid var(--input-border-color) !important;
|
| 694 |
+
}
|
| 695 |
+
"""
|
| 696 |
+
|
| 697 |
+
# Get execution mode info for display
|
| 698 |
+
from src.sandbox.config import EXECUTION_MODE, IS_HUGGINGFACE, MODAL_CONFIGURED
|
| 699 |
+
|
| 700 |
+
exec_mode_display = ""
|
| 701 |
+
if IS_HUGGINGFACE:
|
| 702 |
+
if MODAL_CONFIGURED:
|
| 703 |
+
exec_mode_display = "🚀 Running on Hugging Face Spaces with Modal (cloud execution)"
|
| 704 |
+
else:
|
| 705 |
+
exec_mode_display = "⚠️ Running on Hugging Face but Modal not configured - tests will fail!"
|
| 706 |
+
elif EXECUTION_MODE == "modal":
|
| 707 |
+
exec_mode_display = "🚀 Modal execution enabled (cloud)"
|
| 708 |
+
elif EXECUTION_MODE == "local":
|
| 709 |
+
exec_mode_display = "💻 Local execution mode"
|
| 710 |
+
else:
|
| 711 |
+
exec_mode_display = "" # Don't show anything for auto mode
|
| 712 |
+
|
| 713 |
+
# Build Gradio interface
|
| 714 |
+
with gr.Blocks(title="Legacy Code Modernizer") as app:
|
| 715 |
+
# Add custom CSS via HTML
|
| 716 |
+
gr.HTML(f"""
|
| 717 |
+
<style>
|
| 718 |
+
{custom_css}
|
| 719 |
+
</style>
|
| 720 |
+
<div class="header">
|
| 721 |
+
<h1>🤖 Legacy Code Modernizer</h1>
|
| 722 |
+
<p>AI-powered code modernization for Python, Java, and JavaScript/TypeScript</p>
|
| 723 |
+
<p style="font-size: 12px; opacity: 0.8; margin-top: 8px;">{exec_mode_display}</p>
|
| 724 |
+
</div>
|
| 725 |
+
""")
|
| 726 |
+
|
| 727 |
+
gr.Markdown("""
|
| 728 |
+
### Modernization Workflow:
|
| 729 |
+
1. **Discovery & Classification**: Analyze codebase structure and prioritize files
|
| 730 |
+
2. **Semantic Search**: Group similar patterns using vector-based search
|
| 731 |
+
3. **Code Transformation**: Apply AI-powered modernization patterns
|
| 732 |
+
4. **Testing & Validation**: Generate tests and validate in secure sandbox
|
| 733 |
+
5. **GitHub Integration**: Create pull requests with comprehensive documentation
|
| 734 |
+
|
| 735 |
+
**Powered by**: Google Gemini, Nebius AI, LlamaIndex, Chroma, Modal, MCP Protocol
|
| 736 |
+
""")
|
| 737 |
+
|
| 738 |
+
with gr.Row():
|
| 739 |
+
with gr.Column(scale=2):
|
| 740 |
+
# Input method selection
|
| 741 |
+
with gr.Tabs() as input_tabs:
|
| 742 |
+
with gr.Tab("📄 Single File"):
|
| 743 |
+
single_file_input = gr.File(
|
| 744 |
+
label="Upload Single Code File",
|
| 745 |
+
file_types=[
|
| 746 |
+
".py", ".pyw", ".pyx",
|
| 747 |
+
".java",
|
| 748 |
+
".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"
|
| 749 |
+
],
|
| 750 |
+
type="filepath"
|
| 751 |
+
)
|
| 752 |
+
|
| 753 |
+
file_status = gr.Textbox(
|
| 754 |
+
label="File Status",
|
| 755 |
+
lines=8,
|
| 756 |
+
interactive=False,
|
| 757 |
+
visible=True
|
| 758 |
+
)
|
| 759 |
+
|
| 760 |
+
single_file_path = gr.State(value=None)
|
| 761 |
+
|
| 762 |
+
gr.Markdown("""
|
| 763 |
+
**Supported Languages**:
|
| 764 |
+
- Python (.py, .pyw, .pyx) - pytest with coverage
|
| 765 |
+
- Java (.java) - Maven + JUnit 5 + JaCoCo
|
| 766 |
+
- JavaScript (.js, .jsx, .mjs, .cjs) - Jest with coverage
|
| 767 |
+
- TypeScript (.ts, .tsx) - Jest with coverage
|
| 768 |
+
|
| 769 |
+
**Max file size**: 10 MB per file
|
| 770 |
+
|
| 771 |
+
**Note**: All supported languages include code transformation, test generation, and secure Modal sandbox execution with automatic dependency management.
|
| 772 |
+
""")
|
| 773 |
+
|
| 774 |
+
with gr.Tab("📁 Upload ZIP"):
|
| 775 |
+
file_input = gr.File(
|
| 776 |
+
label="Upload Repository (.zip)",
|
| 777 |
+
file_types=[".zip"],
|
| 778 |
+
type="filepath"
|
| 779 |
+
)
|
| 780 |
+
|
| 781 |
+
with gr.Tab("🔗 Clone from GitHub"):
|
| 782 |
+
github_repo_url = gr.Textbox(
|
| 783 |
+
label="GitHub Repository URL",
|
| 784 |
+
placeholder="https://github.com/owner/repo",
|
| 785 |
+
info="Enter full GitHub URL to clone (without .git extension)"
|
| 786 |
+
)
|
| 787 |
+
|
| 788 |
+
clone_btn = gr.Button(
|
| 789 |
+
"📥 Load Repository",
|
| 790 |
+
variant="secondary",
|
| 791 |
+
size="sm"
|
| 792 |
+
)
|
| 793 |
+
|
| 794 |
+
clone_status = gr.Textbox(
|
| 795 |
+
label="Repository Files",
|
| 796 |
+
lines=15,
|
| 797 |
+
interactive=False,
|
| 798 |
+
visible=False
|
| 799 |
+
)
|
| 800 |
+
|
| 801 |
+
cloned_repo_path = gr.State(value=None)
|
| 802 |
+
|
| 803 |
+
gr.Markdown("**Note**: Requires git to be installed on your system")
|
| 804 |
+
|
| 805 |
+
# Build comprehensive target version list
|
| 806 |
+
all_target_versions = []
|
| 807 |
+
for versions in TARGET_VERSIONS.values():
|
| 808 |
+
all_target_versions.extend(versions)
|
| 809 |
+
all_target_versions.extend(FRAMEWORK_VERSIONS)
|
| 810 |
+
all_target_versions = sorted(set(all_target_versions))
|
| 811 |
+
|
| 812 |
+
target_version = gr.Dropdown(
|
| 813 |
+
choices=all_target_versions,
|
| 814 |
+
label="🎯 Target Version (auto-detected from files)",
|
| 815 |
+
value="Python 3.14",
|
| 816 |
+
info="Automatically updated based on uploaded files",
|
| 817 |
+
allow_custom_value=False
|
| 818 |
+
)
|
| 819 |
+
|
| 820 |
+
# Add option to select from full list
|
| 821 |
+
with gr.Accordion("📋 Browse All Versions", open=False):
|
| 822 |
+
gr.Markdown("""
|
| 823 |
+
**Auto-detection incorrect?** Select from the full list below:
|
| 824 |
+
|
| 825 |
+
**Python**: 3.14, 3.13, 3.12, 3.11, 3.10
|
| 826 |
+
**Java**: 25 LTS, 23, 21 LTS, 17 LTS
|
| 827 |
+
**JavaScript**: ES2025, ES2024, Node.js 25, 24 LTS, 22 LTS
|
| 828 |
+
**TypeScript**: 5.9, 5.8, 5.7, 5.6
|
| 829 |
+
|
| 830 |
+
**Frameworks**: React 19, Angular 21, Vue 3.5, Django 5.2 LTS, Spring Boot 4.0, Laravel 12, Rails 8.1, Next.js 16, FastAPI 0.122, and more
|
| 831 |
+
|
| 832 |
+
Simply select your desired version from the dropdown above.
|
| 833 |
+
""")
|
| 834 |
+
|
| 835 |
+
with gr.Accordion("⚙️ Advanced Options", open=False):
|
| 836 |
+
create_pr = gr.Checkbox(
|
| 837 |
+
label="Create GitHub PR",
|
| 838 |
+
value=False,
|
| 839 |
+
info="Automatically create pull request with modernized code"
|
| 840 |
+
)
|
| 841 |
+
|
| 842 |
+
repo_url = gr.Textbox(
|
| 843 |
+
label="GitHub Repository URL for PR",
|
| 844 |
+
placeholder="owner/repo (e.g., myorg/myproject)",
|
| 845 |
+
info="Required if creating PR"
|
| 846 |
+
)
|
| 847 |
+
|
| 848 |
+
github_token_input = gr.Textbox(
|
| 849 |
+
label="GitHub Personal Access Token",
|
| 850 |
+
placeholder="ghp_xxxxxxxxxxxxxxxxxxxx",
|
| 851 |
+
type="password",
|
| 852 |
+
info="Required for PR creation. Leave empty to use token from .env file",
|
| 853 |
+
container=True,
|
| 854 |
+
elem_classes=["token-input"]
|
| 855 |
+
)
|
| 856 |
+
|
| 857 |
+
process_btn = gr.Button(
|
| 858 |
+
"🚀 Start Modernization",
|
| 859 |
+
variant="primary",
|
| 860 |
+
size="lg"
|
| 861 |
+
)
|
| 862 |
+
|
| 863 |
+
with gr.Column(scale=3):
|
| 864 |
+
output = gr.Textbox(
|
| 865 |
+
label="📊 Status & Progress",
|
| 866 |
+
lines=25,
|
| 867 |
+
max_lines=35
|
| 868 |
+
)
|
| 869 |
+
|
| 870 |
+
# Download section (separate row, below main interface)
|
| 871 |
+
with gr.Row():
|
| 872 |
+
download_modernized = gr.File(
|
| 873 |
+
label="📦 Download Modernized Code",
|
| 874 |
+
visible=False
|
| 875 |
+
)
|
| 876 |
+
download_tests = gr.File(
|
| 877 |
+
label="🧪 Download Test Files",
|
| 878 |
+
visible=False
|
| 879 |
+
)
|
| 880 |
+
download_report = gr.File(
|
| 881 |
+
label="📄 Download Report",
|
| 882 |
+
visible=False
|
| 883 |
+
)
|
| 884 |
+
|
| 885 |
+
with gr.Accordion("📖 Features & Capabilities", open=False):
|
| 886 |
+
gr.Markdown("""
|
| 887 |
+
### Core Features:
|
| 888 |
+
|
| 889 |
+
**🔍 Semantic Code Search**
|
| 890 |
+
- Vector-based similarity search using LlamaIndex and Chroma
|
| 891 |
+
- Automatic pattern grouping for efficient refactoring
|
| 892 |
+
- Bulk code transformation capabilities
|
| 893 |
+
|
| 894 |
+
**🤖 AI-Powered Analysis**
|
| 895 |
+
- Powered by Google Gemini and Nebius AI models
|
| 896 |
+
- Large context window for comprehensive code understanding
|
| 897 |
+
- Multi-language support (Python, Java, JavaScript, TypeScript)
|
| 898 |
+
|
| 899 |
+
**🧪 Automated Testing**
|
| 900 |
+
- Isolated test execution in Modal sandbox
|
| 901 |
+
- Secure environment with no network access
|
| 902 |
+
- Performance benchmarking and coverage reporting
|
| 903 |
+
|
| 904 |
+
**🔗 GitHub Integration**
|
| 905 |
+
- Automated pull request creation via MCP Protocol
|
| 906 |
+
- Comprehensive documentation generation
|
| 907 |
+
- Deployment checklists and rollback plans
|
| 908 |
+
|
| 909 |
+
**📊 Quality Assurance**
|
| 910 |
+
- High test pass rates with comprehensive coverage
|
| 911 |
+
- Behavioral equivalence testing
|
| 912 |
+
- Automated validation before deployment
|
| 913 |
+
""")
|
| 914 |
+
|
| 915 |
+
with gr.Accordion("🎯 Supported Languages & Versions", open=False):
|
| 916 |
+
gr.Markdown("""
|
| 917 |
+
### Supported Languages (Updated November 2025):
|
| 918 |
+
|
| 919 |
+
**Python**
|
| 920 |
+
- Versions: 3.9, 3.10, 3.11, 3.12, 3.13
|
| 921 |
+
- Frameworks: Django 5.1, Flask 3.1, FastAPI 0.115
|
| 922 |
+
- Testing: pytest with coverage
|
| 923 |
+
|
| 924 |
+
**Java**
|
| 925 |
+
- Versions: Java 11 LTS, 17 LTS, 21 LTS, 23
|
| 926 |
+
- Frameworks: Spring Boot 3.4
|
| 927 |
+
- Testing: Maven + JUnit 5 + JaCoCo
|
| 928 |
+
|
| 929 |
+
**JavaScript**
|
| 930 |
+
- Standards: ES2023, ES2024, ES2025
|
| 931 |
+
- Runtimes: Node.js 20 LTS, 22 LTS, 23
|
| 932 |
+
- Frameworks: React 19, Angular 19, Vue 3.5, Express 5.0, Next.js 15
|
| 933 |
+
- Testing: Jest with coverage
|
| 934 |
+
|
| 935 |
+
**TypeScript**
|
| 936 |
+
- Versions: 5.4, 5.5, 5.6, 5.7
|
| 937 |
+
- Frameworks: React 19, Angular 19, Vue 3.5, Next.js 15
|
| 938 |
+
- Testing: Jest with ts-jest
|
| 939 |
+
""")
|
| 940 |
+
|
| 941 |
+
# State for suggested versions
|
| 942 |
+
suggested_versions_state = gr.State(value=[])
|
| 943 |
+
|
| 944 |
+
# Event handlers
|
| 945 |
+
# Handle single file validation (automatic on upload)
|
| 946 |
+
def validate_and_show(file_path):
|
| 947 |
+
"""Wrapper to validate file and show status."""
|
| 948 |
+
logger.info(f"validate_and_show called with file_path: {file_path}")
|
| 949 |
+
if not file_path:
|
| 950 |
+
logger.warning("No file path provided to validate_and_show")
|
| 951 |
+
return "📄 Upload a code file to get started", None, gr.update(), []
|
| 952 |
+
|
| 953 |
+
try:
|
| 954 |
+
message, zip_path, suggested_versions = process_single_file(file_path)
|
| 955 |
+
logger.info(f"Validation result: message='{message}', zip_path='{zip_path}', versions={len(suggested_versions)}")
|
| 956 |
+
|
| 957 |
+
# Update dropdown with suggested versions
|
| 958 |
+
if suggested_versions:
|
| 959 |
+
return message, zip_path, gr.update(choices=suggested_versions, value=suggested_versions[0]), suggested_versions
|
| 960 |
+
else:
|
| 961 |
+
return message, zip_path, gr.update(), []
|
| 962 |
+
except Exception as e:
|
| 963 |
+
logger.error(f"Error in validate_and_show: {e}", exc_info=True)
|
| 964 |
+
return f"❌ Error: {str(e)}", None, gr.update(), []
|
| 965 |
+
|
| 966 |
+
# Handle ZIP file upload
|
| 967 |
+
def handle_zip_upload(file_path):
|
| 968 |
+
"""Handle ZIP file upload and detect languages."""
|
| 969 |
+
if not file_path:
|
| 970 |
+
return gr.update(), []
|
| 971 |
+
|
| 972 |
+
try:
|
| 973 |
+
lang_summary, suggested_versions = detect_languages_from_zip(file_path)
|
| 974 |
+
logger.info(f"ZIP upload: {lang_summary}, {len(suggested_versions)} versions")
|
| 975 |
+
|
| 976 |
+
if suggested_versions:
|
| 977 |
+
return gr.update(choices=suggested_versions, value=suggested_versions[0]), suggested_versions
|
| 978 |
+
else:
|
| 979 |
+
return gr.update(), []
|
| 980 |
+
except Exception as e:
|
| 981 |
+
logger.error(f"Error handling ZIP upload: {e}")
|
| 982 |
+
return gr.update(), []
|
| 983 |
+
|
| 984 |
+
# Auto-validate on file upload
|
| 985 |
+
single_file_input.change(
|
| 986 |
+
fn=validate_and_show,
|
| 987 |
+
inputs=[single_file_input],
|
| 988 |
+
outputs=[file_status, single_file_path, target_version, suggested_versions_state],
|
| 989 |
+
show_progress=True
|
| 990 |
+
)
|
| 991 |
+
|
| 992 |
+
# Auto-detect on ZIP upload
|
| 993 |
+
file_input.change(
|
| 994 |
+
fn=handle_zip_upload,
|
| 995 |
+
inputs=[file_input],
|
| 996 |
+
outputs=[target_version, suggested_versions_state],
|
| 997 |
+
show_progress=False
|
| 998 |
+
)
|
| 999 |
+
|
| 1000 |
+
# Handle GitHub clone button
|
| 1001 |
+
def handle_github_clone(github_url):
|
| 1002 |
+
"""Wrapper for GitHub clone with version detection."""
|
| 1003 |
+
status, zip_path, visibility, suggested_versions = clone_github_repo(github_url)
|
| 1004 |
+
|
| 1005 |
+
if suggested_versions:
|
| 1006 |
+
return status, zip_path, visibility, gr.update(choices=suggested_versions, value=suggested_versions[0]), suggested_versions
|
| 1007 |
+
else:
|
| 1008 |
+
return status, zip_path, visibility, gr.update(), []
|
| 1009 |
+
|
| 1010 |
+
clone_btn.click(
|
| 1011 |
+
fn=handle_github_clone,
|
| 1012 |
+
inputs=[github_repo_url],
|
| 1013 |
+
outputs=[clone_status, cloned_repo_path, clone_status, target_version, suggested_versions_state],
|
| 1014 |
+
show_progress=True
|
| 1015 |
+
)
|
| 1016 |
+
|
| 1017 |
+
# Handle modernization
|
| 1018 |
+
process_btn.click(
|
| 1019 |
+
fn=modernize_code,
|
| 1020 |
+
inputs=[file_input, target_version, create_pr, repo_url, github_token_input, cloned_repo_path, single_file_path],
|
| 1021 |
+
outputs=[output, download_modernized, download_tests, download_report],
|
| 1022 |
+
show_progress="full"
|
| 1023 |
+
)
|
| 1024 |
+
|
| 1025 |
+
# Examples
|
| 1026 |
+
gr.Examples(
|
| 1027 |
+
examples=[
|
| 1028 |
+
[None, "Python 3.12", False, "", "", None, None],
|
| 1029 |
+
[None, "Java 21 LTS", False, "", "", None, None],
|
| 1030 |
+
[None, "React 18 (Hooks)", True, "myorg/myproject", "", None, None]
|
| 1031 |
+
],
|
| 1032 |
+
inputs=[file_input, target_version, create_pr, repo_url, github_token_input, cloned_repo_path, single_file_path],
|
| 1033 |
+
label="📝 Example Configurations"
|
| 1034 |
+
)
|
| 1035 |
+
|
| 1036 |
+
|
| 1037 |
+
|
| 1038 |
+
if __name__ == "__main__":
|
| 1039 |
+
app.launch(
|
| 1040 |
+
server_name="0.0.0.0",
|
| 1041 |
+
server_port=7860,
|
| 1042 |
+
share=False,
|
| 1043 |
+
show_error=True,
|
| 1044 |
+
css=custom_css
|
| 1045 |
+
)
|
src/utils/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Utility functions for file handling and processing."""
|
src/utils/file_handler.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""File handling utilities for repository processing."""
|
| 2 |
+
|
| 3 |
+
import zipfile
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import List, Set
|
| 7 |
+
import shutil
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class FileHandler:
|
| 11 |
+
"""Handles file extraction and code file discovery."""
|
| 12 |
+
|
| 13 |
+
# Supported code file extensions
|
| 14 |
+
CODE_EXTENSIONS: Set[str] = {
|
| 15 |
+
'.py', '.java', '.js', '.ts', '.jsx', '.tsx',
|
| 16 |
+
'.php', '.rb', '.go', '.rs', '.cpp', '.c', '.h',
|
| 17 |
+
'.cs', '.swift', '.kt', '.scala', '.pl', '.r'
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
# Files/directories to exclude
|
| 21 |
+
EXCLUDE_PATTERNS: Set[str] = {
|
| 22 |
+
'__pycache__', '.git', '.svn', 'node_modules',
|
| 23 |
+
'venv', 'env', '.venv', 'dist', 'build',
|
| 24 |
+
'.idea', '.vscode', '.pytest_cache', '.mypy_cache'
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
def __init__(self, upload_dir: str = "./uploads"):
|
| 28 |
+
"""
|
| 29 |
+
Initialize file handler.
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
upload_dir: Directory to store uploaded and extracted files
|
| 33 |
+
"""
|
| 34 |
+
self.upload_dir = Path(upload_dir)
|
| 35 |
+
self.upload_dir.mkdir(exist_ok=True, parents=True)
|
| 36 |
+
|
| 37 |
+
def extract_repo(self, zip_path: str) -> str:
|
| 38 |
+
"""
|
| 39 |
+
Extract uploaded repository ZIP file.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
zip_path: Path to the ZIP file
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
Path to extracted directory
|
| 46 |
+
|
| 47 |
+
Raises:
|
| 48 |
+
ValueError: If file is not a valid ZIP
|
| 49 |
+
"""
|
| 50 |
+
if not zipfile.is_zipfile(zip_path):
|
| 51 |
+
raise ValueError(f"File {zip_path} is not a valid ZIP file")
|
| 52 |
+
|
| 53 |
+
# Create unique extraction directory
|
| 54 |
+
extract_path = self.upload_dir / "extracted"
|
| 55 |
+
|
| 56 |
+
# Clean up previous extraction
|
| 57 |
+
if extract_path.exists():
|
| 58 |
+
shutil.rmtree(extract_path)
|
| 59 |
+
|
| 60 |
+
extract_path.mkdir(exist_ok=True, parents=True)
|
| 61 |
+
|
| 62 |
+
try:
|
| 63 |
+
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
|
| 64 |
+
zip_ref.extractall(extract_path)
|
| 65 |
+
|
| 66 |
+
return str(extract_path)
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
raise ValueError(f"Error extracting ZIP file: {e}")
|
| 70 |
+
|
| 71 |
+
def list_code_files(self, repo_path: str) -> List[str]:
|
| 72 |
+
"""
|
| 73 |
+
List all code files in repository.
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
repo_path: Path to repository directory
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
List of relative file paths
|
| 80 |
+
"""
|
| 81 |
+
code_files = []
|
| 82 |
+
repo_path = Path(repo_path)
|
| 83 |
+
|
| 84 |
+
for root, dirs, files in os.walk(repo_path):
|
| 85 |
+
# Filter out excluded directories
|
| 86 |
+
dirs[:] = [d for d in dirs if d not in self.EXCLUDE_PATTERNS]
|
| 87 |
+
|
| 88 |
+
for filename in files:
|
| 89 |
+
file_path = Path(root) / filename
|
| 90 |
+
|
| 91 |
+
# Check if it's a code file
|
| 92 |
+
if file_path.suffix in self.CODE_EXTENSIONS:
|
| 93 |
+
# Get relative path
|
| 94 |
+
rel_path = file_path.relative_to(repo_path)
|
| 95 |
+
code_files.append(str(rel_path))
|
| 96 |
+
|
| 97 |
+
return sorted(code_files)
|
| 98 |
+
|
| 99 |
+
def read_file(self, file_path: str, max_size: int = 1024 * 1024) -> str:
|
| 100 |
+
"""
|
| 101 |
+
Read file contents safely.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
file_path: Path to file
|
| 105 |
+
max_size: Maximum file size in bytes (default 1MB)
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
File contents as string
|
| 109 |
+
|
| 110 |
+
Raises:
|
| 111 |
+
ValueError: If file is too large or cannot be read
|
| 112 |
+
"""
|
| 113 |
+
file_path = Path(file_path)
|
| 114 |
+
|
| 115 |
+
if not file_path.exists():
|
| 116 |
+
raise ValueError(f"File {file_path} does not exist")
|
| 117 |
+
|
| 118 |
+
file_size = file_path.stat().st_size
|
| 119 |
+
if file_size > max_size:
|
| 120 |
+
raise ValueError(
|
| 121 |
+
f"File {file_path} is too large ({file_size} bytes). "
|
| 122 |
+
f"Maximum size is {max_size} bytes."
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 127 |
+
return f.read()
|
| 128 |
+
except UnicodeDecodeError:
|
| 129 |
+
# Try with different encoding
|
| 130 |
+
try:
|
| 131 |
+
with open(file_path, 'r', encoding='latin-1') as f:
|
| 132 |
+
return f.read()
|
| 133 |
+
except Exception as e:
|
| 134 |
+
raise ValueError(f"Cannot read file {file_path}: {e}")
|
| 135 |
+
|
| 136 |
+
def get_file_info(self, file_path: str) -> dict:
|
| 137 |
+
"""
|
| 138 |
+
Get information about a file.
|
| 139 |
+
|
| 140 |
+
Args:
|
| 141 |
+
file_path: Path to file
|
| 142 |
+
|
| 143 |
+
Returns:
|
| 144 |
+
Dictionary with file information
|
| 145 |
+
"""
|
| 146 |
+
file_path = Path(file_path)
|
| 147 |
+
|
| 148 |
+
if not file_path.exists():
|
| 149 |
+
return {"exists": False}
|
| 150 |
+
|
| 151 |
+
stat = file_path.stat()
|
| 152 |
+
|
| 153 |
+
return {
|
| 154 |
+
"exists": True,
|
| 155 |
+
"name": file_path.name,
|
| 156 |
+
"extension": file_path.suffix,
|
| 157 |
+
"size_bytes": stat.st_size,
|
| 158 |
+
"size_kb": round(stat.st_size / 1024, 2),
|
| 159 |
+
"is_code": file_path.suffix in self.CODE_EXTENSIONS
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
def cleanup(self):
|
| 163 |
+
"""Clean up temporary files and directories."""
|
| 164 |
+
if self.upload_dir.exists():
|
| 165 |
+
shutil.rmtree(self.upload_dir)
|
| 166 |
+
self.upload_dir.mkdir(exist_ok=True, parents=True)
|
src/workflow/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Workflow orchestration module."""
|
| 2 |
+
|
| 3 |
+
from src.workflow.orchestrator import ModernizationOrchestrator
|
| 4 |
+
|
| 5 |
+
__all__ = ['ModernizationOrchestrator']
|
src/workflow/orchestrator.py
ADDED
|
@@ -0,0 +1,732 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Workflow Orchestrator - Integrates all phases into complete pipeline.
|
| 3 |
+
Phase 5: Complete end-to-end workflow with all MCP integrations.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import logging
|
| 8 |
+
import asyncio
|
| 9 |
+
from typing import Dict, List, Optional
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
# Phase 1-2: Classification
|
| 13 |
+
from src.agents.classifier import CodeClassifier
|
| 14 |
+
from src.agents.pattern_integration import PatternMatcherIntegration
|
| 15 |
+
from src.utils.file_handler import FileHandler
|
| 16 |
+
|
| 17 |
+
# Phase 3: Search
|
| 18 |
+
from src.search.vector_store import CodeSearchEngine
|
| 19 |
+
|
| 20 |
+
# Phase 4: Analysis & Transformation
|
| 21 |
+
from src.agents.analyzer import CodeAnalyzer
|
| 22 |
+
from src.agents.transformer import CodeTransformer
|
| 23 |
+
|
| 24 |
+
# Phase 5: Testing & GitHub
|
| 25 |
+
from src.agents.test_generator import CodeTestGenerator
|
| 26 |
+
from src.sandbox.validator import ModalSandboxValidator
|
| 27 |
+
|
| 28 |
+
# Lazy import to avoid circular dependency
|
| 29 |
+
GitHubMCPClient = None
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class ModernizationOrchestrator:
|
| 35 |
+
"""
|
| 36 |
+
Orchestrates the complete code modernization workflow.
|
| 37 |
+
Integrates all 5 phases into a seamless pipeline.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self, use_intelligent_matcher: bool = True):
|
| 41 |
+
"""Initialize orchestrator with all components."""
|
| 42 |
+
logger.info("Initializing ModernizationOrchestrator")
|
| 43 |
+
|
| 44 |
+
# Phase 1-2 components
|
| 45 |
+
self.use_intelligent_matcher = use_intelligent_matcher
|
| 46 |
+
if use_intelligent_matcher:
|
| 47 |
+
self.pattern_integration = PatternMatcherIntegration(
|
| 48 |
+
use_intelligent_matcher=True,
|
| 49 |
+
cache_dir=".pattern_cache"
|
| 50 |
+
)
|
| 51 |
+
logger.info("Using IntelligentPatternMatcher")
|
| 52 |
+
else:
|
| 53 |
+
self.classifier = CodeClassifier()
|
| 54 |
+
logger.info("Using legacy CodeClassifier")
|
| 55 |
+
|
| 56 |
+
self.file_handler = FileHandler()
|
| 57 |
+
|
| 58 |
+
# Phase 3 components
|
| 59 |
+
self.search_engine = None # Initialized per repo
|
| 60 |
+
|
| 61 |
+
# Phase 4 components
|
| 62 |
+
self.analyzer = CodeAnalyzer()
|
| 63 |
+
self.transformer = CodeTransformer()
|
| 64 |
+
|
| 65 |
+
# Phase 5 components
|
| 66 |
+
self.test_generator = CodeTestGenerator()
|
| 67 |
+
self.validator = ModalSandboxValidator()
|
| 68 |
+
|
| 69 |
+
# Lazy load GitHub client to avoid circular import
|
| 70 |
+
self.github_client = None
|
| 71 |
+
|
| 72 |
+
logger.info("ModernizationOrchestrator initialized successfully")
|
| 73 |
+
|
| 74 |
+
async def modernize_repository(
|
| 75 |
+
self,
|
| 76 |
+
repo_path: str,
|
| 77 |
+
target_version: str = "Python 3.14",
|
| 78 |
+
create_pr: bool = False,
|
| 79 |
+
repo_url: Optional[str] = None,
|
| 80 |
+
github_token: Optional[str] = None,
|
| 81 |
+
progress_callback: Optional[callable] = None
|
| 82 |
+
) -> Dict:
|
| 83 |
+
"""
|
| 84 |
+
Complete modernization workflow for a repository.
|
| 85 |
+
|
| 86 |
+
Args:
|
| 87 |
+
repo_path: Path to repository (ZIP or directory)
|
| 88 |
+
target_version: Target language/framework version
|
| 89 |
+
create_pr: Whether to create GitHub PR
|
| 90 |
+
repo_url: GitHub repository URL (required if create_pr=True)
|
| 91 |
+
github_token: GitHub personal access token (optional, uses .env if not provided)
|
| 92 |
+
progress_callback: Optional callback function for progress updates
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
Dictionary with complete modernization results
|
| 96 |
+
"""
|
| 97 |
+
logger.info(f"Starting modernization for {repo_path}")
|
| 98 |
+
|
| 99 |
+
def update_progress(phase: str, message: str):
|
| 100 |
+
"""Helper to call progress callback if provided."""
|
| 101 |
+
if progress_callback:
|
| 102 |
+
progress_callback(phase, message)
|
| 103 |
+
|
| 104 |
+
results = {
|
| 105 |
+
"success": False,
|
| 106 |
+
"phases": {},
|
| 107 |
+
"statistics": {},
|
| 108 |
+
"errors": []
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
try:
|
| 112 |
+
# Phase 1: Extract and discover files
|
| 113 |
+
logger.info("Phase 1: File discovery")
|
| 114 |
+
update_progress("Phase 1", "Extracting and discovering files...")
|
| 115 |
+
|
| 116 |
+
if repo_path.endswith('.zip'):
|
| 117 |
+
extract_path = self.file_handler.extract_repo(repo_path)
|
| 118 |
+
else:
|
| 119 |
+
extract_path = repo_path
|
| 120 |
+
|
| 121 |
+
files = self.file_handler.list_code_files(extract_path)
|
| 122 |
+
logger.info(f"Discovered {len(files)} code files")
|
| 123 |
+
update_progress("Phase 1", f"Discovered {len(files)} code files")
|
| 124 |
+
|
| 125 |
+
results['phases']['discovery'] = {
|
| 126 |
+
"files_found": len(files),
|
| 127 |
+
"repo_path": extract_path
|
| 128 |
+
}
|
| 129 |
+
|
| 130 |
+
# Phase 2: Classify files
|
| 131 |
+
logger.info("Phase 2: File classification")
|
| 132 |
+
update_progress("Phase 2", "Classifying files with AI pattern detection...")
|
| 133 |
+
|
| 134 |
+
# Read file contents for intelligent matching
|
| 135 |
+
file_contents = {}
|
| 136 |
+
if self.use_intelligent_matcher:
|
| 137 |
+
logger.info("Reading file contents for intelligent pattern matching...")
|
| 138 |
+
for file_path in files[:50]: # Limit to 50 files for demo
|
| 139 |
+
try:
|
| 140 |
+
full_path = os.path.join(extract_path, file_path)
|
| 141 |
+
content = self.file_handler.read_file(full_path)
|
| 142 |
+
if content:
|
| 143 |
+
file_contents[file_path] = content
|
| 144 |
+
except Exception as e:
|
| 145 |
+
logger.warning(f"Could not read {file_path}: {e}")
|
| 146 |
+
|
| 147 |
+
classifications = self.pattern_integration.classify_files(
|
| 148 |
+
list(file_contents.keys()),
|
| 149 |
+
file_contents
|
| 150 |
+
)
|
| 151 |
+
|
| 152 |
+
# Get detailed statistics
|
| 153 |
+
analyses = self.pattern_integration.pattern_matcher.analyze_batch(file_contents)
|
| 154 |
+
stats = self.pattern_integration.generate_statistics(analyses)
|
| 155 |
+
|
| 156 |
+
logger.info(f"Intelligent classification: {stats['modernize_high']} high, "
|
| 157 |
+
f"{stats['modernize_low']} low, {stats['skip']} skip")
|
| 158 |
+
logger.info(f"Detected {stats['patterns_detected']} patterns across {stats['total_files']} files")
|
| 159 |
+
else:
|
| 160 |
+
classifications = self.classifier.classify_files(files)
|
| 161 |
+
stats = None
|
| 162 |
+
|
| 163 |
+
modernize_high = [f for f, c in classifications.items() if c == 'modernize_high']
|
| 164 |
+
modernize_low = [f for f, c in classifications.items() if c == 'modernize_low']
|
| 165 |
+
skip_files = [f for f, c in classifications.items() if c == 'skip']
|
| 166 |
+
|
| 167 |
+
logger.info(f"Classification: {len(modernize_high)} high, {len(modernize_low)} low, {len(skip_files)} skip")
|
| 168 |
+
|
| 169 |
+
results['phases']['classification'] = {
|
| 170 |
+
"modernize_high": len(modernize_high),
|
| 171 |
+
"modernize_low": len(modernize_low),
|
| 172 |
+
"skip": len(skip_files),
|
| 173 |
+
"classifications": classifications,
|
| 174 |
+
"intelligent_stats": stats if self.use_intelligent_matcher else None
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
# Phase 3: Semantic search and pattern grouping
|
| 178 |
+
logger.info("Phase 3: Semantic search")
|
| 179 |
+
update_progress("Phase 3", "Building semantic index with LlamaIndex...")
|
| 180 |
+
|
| 181 |
+
self.search_engine = CodeSearchEngine(persist_dir=None)
|
| 182 |
+
|
| 183 |
+
# Build index for high-priority files
|
| 184 |
+
files_to_modernize = modernize_high + modernize_low
|
| 185 |
+
if files_to_modernize:
|
| 186 |
+
self.search_engine.build_index(extract_path) # Build index from repo
|
| 187 |
+
|
| 188 |
+
# Find pattern groups
|
| 189 |
+
pattern_groups = self._find_pattern_groups(files_to_modernize[:20])
|
| 190 |
+
logger.info(f"Found {len(pattern_groups)} pattern groups")
|
| 191 |
+
|
| 192 |
+
results['phases']['search'] = {
|
| 193 |
+
"indexed_files": min(len(files_to_modernize), 100),
|
| 194 |
+
"pattern_groups": len(pattern_groups)
|
| 195 |
+
}
|
| 196 |
+
else:
|
| 197 |
+
pattern_groups = []
|
| 198 |
+
results['phases']['search'] = {"message": "No files to modernize"}
|
| 199 |
+
|
| 200 |
+
# Phase 4: Analysis and transformation
|
| 201 |
+
logger.info("Phase 4: Code transformation")
|
| 202 |
+
update_progress("Phase 4", "Analyzing and transforming code...")
|
| 203 |
+
|
| 204 |
+
transformations = []
|
| 205 |
+
|
| 206 |
+
# Use intelligent pattern data if available
|
| 207 |
+
if self.use_intelligent_matcher and file_contents:
|
| 208 |
+
logger.info("Using intelligent pattern analysis for transformation")
|
| 209 |
+
|
| 210 |
+
# Get prioritized files from intelligent matcher
|
| 211 |
+
prioritized = self.pattern_integration.pattern_matcher.prioritize_files(analyses)
|
| 212 |
+
|
| 213 |
+
# Process top priority files
|
| 214 |
+
files_to_transform = [
|
| 215 |
+
(fp, analysis) for fp, analysis in prioritized
|
| 216 |
+
if analysis.requires_modernization
|
| 217 |
+
][:10] # Limit to 10 files for demo
|
| 218 |
+
|
| 219 |
+
logger.info(f"Processing {len(files_to_transform)} high-priority files with detailed pattern data")
|
| 220 |
+
|
| 221 |
+
total_files = len(files_to_transform)
|
| 222 |
+
for idx, (file_path, file_analysis) in enumerate(files_to_transform, 1):
|
| 223 |
+
try:
|
| 224 |
+
update_progress("Phase 4", f"Transforming file {idx}/{total_files}: {Path(file_path).name}")
|
| 225 |
+
|
| 226 |
+
original_code = file_contents.get(file_path, "")
|
| 227 |
+
if not original_code:
|
| 228 |
+
continue
|
| 229 |
+
|
| 230 |
+
# Convert intelligent pattern analysis to transformation plan
|
| 231 |
+
transformation_plan = self.pattern_integration.get_transformation_plan(file_analysis)
|
| 232 |
+
|
| 233 |
+
# Transform using detailed pattern information
|
| 234 |
+
modernized_code = await self.transformer.transform_code(
|
| 235 |
+
file_path,
|
| 236 |
+
original_code,
|
| 237 |
+
transformation_plan
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
transformations.append({
|
| 241 |
+
"file_path": file_path,
|
| 242 |
+
"original_code": original_code,
|
| 243 |
+
"modernized_code": modernized_code,
|
| 244 |
+
"analysis": transformation_plan,
|
| 245 |
+
"patterns_addressed": [p['pattern'] for p in transformation_plan['steps']],
|
| 246 |
+
"pattern_details": file_analysis.patterns # Include detailed pattern info
|
| 247 |
+
})
|
| 248 |
+
|
| 249 |
+
except Exception as e:
|
| 250 |
+
logger.error(f"Error transforming {file_path}: {e}")
|
| 251 |
+
results['errors'].append(f"Transformation error for {file_path}: {e}")
|
| 252 |
+
else:
|
| 253 |
+
# Fallback to legacy pattern grouping
|
| 254 |
+
logger.info("Using legacy pattern grouping for transformation")
|
| 255 |
+
|
| 256 |
+
file_to_patterns = {}
|
| 257 |
+
for group in pattern_groups[:5]: # Limit to 5 groups for demo
|
| 258 |
+
for file_path in group['files'][:3]:
|
| 259 |
+
if file_path not in file_to_patterns:
|
| 260 |
+
file_to_patterns[file_path] = []
|
| 261 |
+
file_to_patterns[file_path].append(group['pattern_name'])
|
| 262 |
+
|
| 263 |
+
logger.info(f"Processing {len(file_to_patterns)} unique files")
|
| 264 |
+
|
| 265 |
+
total_files = len(file_to_patterns)
|
| 266 |
+
for idx, (file_path, patterns) in enumerate(file_to_patterns.items(), 1):
|
| 267 |
+
try:
|
| 268 |
+
update_progress("Phase 4", f"Transforming file {idx}/{total_files}: {Path(file_path).name}")
|
| 269 |
+
|
| 270 |
+
full_path = os.path.join(extract_path, file_path)
|
| 271 |
+
original_code = self.file_handler.read_file(full_path)
|
| 272 |
+
|
| 273 |
+
if not original_code:
|
| 274 |
+
continue
|
| 275 |
+
|
| 276 |
+
# Analyze patterns
|
| 277 |
+
combined_pattern = " AND ".join(patterns)
|
| 278 |
+
analysis = await self.analyzer.analyze_pattern(
|
| 279 |
+
[file_path],
|
| 280 |
+
combined_pattern,
|
| 281 |
+
{file_path: original_code}
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
+
# Transform file
|
| 285 |
+
modernized_code = await self.transformer.transform_code(
|
| 286 |
+
file_path,
|
| 287 |
+
original_code,
|
| 288 |
+
analysis
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
transformations.append({
|
| 292 |
+
"file_path": file_path,
|
| 293 |
+
"original_code": original_code,
|
| 294 |
+
"modernized_code": modernized_code,
|
| 295 |
+
"analysis": analysis,
|
| 296 |
+
"patterns_addressed": patterns
|
| 297 |
+
})
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
logger.error(f"Error transforming {file_path}: {e}")
|
| 301 |
+
results['errors'].append(f"Transformation error for {file_path}: {e}")
|
| 302 |
+
|
| 303 |
+
logger.info(f"Transformed {len(transformations)} files")
|
| 304 |
+
|
| 305 |
+
# Save transformed files to output directory
|
| 306 |
+
output_dir = Path("modernized_output")
|
| 307 |
+
output_dir.mkdir(exist_ok=True)
|
| 308 |
+
|
| 309 |
+
for t in transformations:
|
| 310 |
+
try:
|
| 311 |
+
# Create subdirectories if needed
|
| 312 |
+
output_file = output_dir / t['file_path']
|
| 313 |
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
| 314 |
+
|
| 315 |
+
# Save modernized code
|
| 316 |
+
output_file.write_text(t['modernized_code'])
|
| 317 |
+
logger.info(f"Saved: {output_file}")
|
| 318 |
+
|
| 319 |
+
# Also save original for comparison
|
| 320 |
+
original_file = output_dir / "original" / t['file_path']
|
| 321 |
+
original_file.parent.mkdir(parents=True, exist_ok=True)
|
| 322 |
+
original_file.write_text(t['original_code'])
|
| 323 |
+
|
| 324 |
+
except Exception as e:
|
| 325 |
+
logger.error(f"Error saving {t['file_path']}: {e}")
|
| 326 |
+
|
| 327 |
+
logger.info(f"Output saved to: {output_dir.absolute()}")
|
| 328 |
+
|
| 329 |
+
results['phases']['transformation'] = {
|
| 330 |
+
"files_transformed": len(transformations),
|
| 331 |
+
"output_directory": str(output_dir.absolute())
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
# Store transformations for zip file creation
|
| 335 |
+
results['transformations'] = transformations
|
| 336 |
+
|
| 337 |
+
# Phase 5: Test generation and validation
|
| 338 |
+
logger.info("Phase 5: Test generation and validation")
|
| 339 |
+
update_progress("Phase 5", "Generating tests and validating in Modal sandbox...")
|
| 340 |
+
|
| 341 |
+
validation_results = []
|
| 342 |
+
|
| 343 |
+
# Create tests directory
|
| 344 |
+
tests_dir = output_dir / "tests"
|
| 345 |
+
tests_dir.mkdir(exist_ok=True)
|
| 346 |
+
|
| 347 |
+
total_tests = min(len(transformations), 10)
|
| 348 |
+
for idx, t in enumerate(transformations[:10], 1): # Limit to 10 for demo
|
| 349 |
+
try:
|
| 350 |
+
# Update progress
|
| 351 |
+
update_progress("Phase 5", f"Testing file {idx}/{total_tests}: {Path(t['file_path']).name}")
|
| 352 |
+
|
| 353 |
+
# Generate tests
|
| 354 |
+
tests = self.test_generator.generate_tests(
|
| 355 |
+
t['original_code'],
|
| 356 |
+
t['modernized_code'],
|
| 357 |
+
t['file_path']
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
# Validate and auto-fix export issues
|
| 361 |
+
if tests:
|
| 362 |
+
from src.agents.code_validator import validate_and_fix_code
|
| 363 |
+
|
| 364 |
+
# Detect language from file extension
|
| 365 |
+
file_ext = Path(t['file_path']).suffix.lower()
|
| 366 |
+
language_map = {
|
| 367 |
+
'.ts': 'typescript',
|
| 368 |
+
'.js': 'javascript',
|
| 369 |
+
'.py': 'python',
|
| 370 |
+
'.java': 'java'
|
| 371 |
+
}
|
| 372 |
+
language = language_map.get(file_ext, 'unknown')
|
| 373 |
+
|
| 374 |
+
# Validate and fix
|
| 375 |
+
fixed_code, is_valid, issues = validate_and_fix_code(
|
| 376 |
+
t['modernized_code'],
|
| 377 |
+
tests,
|
| 378 |
+
language
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
if not is_valid:
|
| 382 |
+
logger.warning(f"Code validation issues for {t['file_path']}: {issues}")
|
| 383 |
+
|
| 384 |
+
if fixed_code != t['modernized_code']:
|
| 385 |
+
logger.info(f"Auto-fixed export issues in {t['file_path']}")
|
| 386 |
+
t['modernized_code'] = fixed_code
|
| 387 |
+
|
| 388 |
+
# Re-save the fixed source file
|
| 389 |
+
output_file = output_dir / Path(t['file_path']).name
|
| 390 |
+
output_file.write_text(fixed_code)
|
| 391 |
+
|
| 392 |
+
# Save test file
|
| 393 |
+
if tests:
|
| 394 |
+
test_file = tests_dir / f"test_{Path(t['file_path']).name}"
|
| 395 |
+
test_file.write_text(tests)
|
| 396 |
+
logger.info(f"Saved test: {test_file}")
|
| 397 |
+
|
| 398 |
+
# Validate in sandbox
|
| 399 |
+
validation = self.validator.validate_transformation(
|
| 400 |
+
t['original_code'],
|
| 401 |
+
t['modernized_code'],
|
| 402 |
+
tests,
|
| 403 |
+
file_path=t['file_path']
|
| 404 |
+
)
|
| 405 |
+
|
| 406 |
+
validation['file_path'] = t['file_path']
|
| 407 |
+
validation_results.append(validation)
|
| 408 |
+
|
| 409 |
+
except Exception as e:
|
| 410 |
+
logger.error(f"Error validating {t['file_path']}: {e}")
|
| 411 |
+
results['errors'].append(f"Validation error: {e}")
|
| 412 |
+
|
| 413 |
+
# Calculate aggregate test results
|
| 414 |
+
total_tests = sum(v.get('tests_run', 0) for v in validation_results)
|
| 415 |
+
total_passed = sum(v.get('tests_passed', 0) for v in validation_results)
|
| 416 |
+
# Fix: Only average coverage for files that have coverage data
|
| 417 |
+
coverage_values = [v.get('coverage_percent', 0) for v in validation_results if v.get('coverage_percent', 0) > 0]
|
| 418 |
+
avg_coverage = sum(coverage_values) / len(coverage_values) if coverage_values else 0.0
|
| 419 |
+
|
| 420 |
+
logger.info(f"Validation: {total_passed}/{total_tests} tests passed, {avg_coverage:.1f}% coverage")
|
| 421 |
+
|
| 422 |
+
results['phases']['validation'] = {
|
| 423 |
+
"files_validated": len(validation_results),
|
| 424 |
+
"total_tests": total_tests,
|
| 425 |
+
"tests_passed": total_passed,
|
| 426 |
+
"tests_failed": total_tests - total_passed,
|
| 427 |
+
"average_coverage": round(avg_coverage, 2),
|
| 428 |
+
"pass_rate": round(total_passed / max(total_tests, 1) * 100, 2)
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
# Phase 5b: GitHub PR creation (optional)
|
| 432 |
+
if create_pr and repo_url:
|
| 433 |
+
logger.info("Phase 5b: Creating GitHub PR")
|
| 434 |
+
|
| 435 |
+
# Lazy load GitHub client
|
| 436 |
+
if self.github_client is None:
|
| 437 |
+
from src.mcp.github_client import GitHubMCPClient
|
| 438 |
+
self.github_client = GitHubMCPClient(github_token=github_token)
|
| 439 |
+
|
| 440 |
+
# Prepare changed files
|
| 441 |
+
changed_files = {
|
| 442 |
+
t['file_path']: t['modernized_code']
|
| 443 |
+
for t in transformations
|
| 444 |
+
}
|
| 445 |
+
|
| 446 |
+
# Generate PR summary
|
| 447 |
+
pr_summary = self._generate_pr_summary(results, target_version)
|
| 448 |
+
|
| 449 |
+
# Create PR
|
| 450 |
+
pr_result = await self.github_client.create_pr(
|
| 451 |
+
repo_url=repo_url,
|
| 452 |
+
changed_files=changed_files,
|
| 453 |
+
pr_summary=pr_summary,
|
| 454 |
+
test_results=results['phases']['validation']
|
| 455 |
+
)
|
| 456 |
+
|
| 457 |
+
results['phases']['github_pr'] = pr_result
|
| 458 |
+
logger.info(f"PR creation: {pr_result.get('success', False)}")
|
| 459 |
+
|
| 460 |
+
# Calculate final statistics
|
| 461 |
+
results['statistics'] = {
|
| 462 |
+
"total_files": len(files),
|
| 463 |
+
"files_modernized": len(transformations),
|
| 464 |
+
"tests_generated": total_tests,
|
| 465 |
+
"test_pass_rate": round(total_passed / max(total_tests, 1) * 100, 2),
|
| 466 |
+
"average_coverage": round(avg_coverage, 2)
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
# Add output locations
|
| 470 |
+
results['output'] = {
|
| 471 |
+
"modernized_files": str(output_dir.absolute()),
|
| 472 |
+
"original_files": str((output_dir / "original").absolute()),
|
| 473 |
+
"test_files": str((output_dir / "tests").absolute())
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
results['success'] = True
|
| 477 |
+
logger.info("Modernization workflow completed successfully")
|
| 478 |
+
logger.info(f"📁 Modernized files: {output_dir.absolute()}")
|
| 479 |
+
logger.info(f"📁 Test files: {output_dir / 'tests'}")
|
| 480 |
+
|
| 481 |
+
except Exception as e:
|
| 482 |
+
logger.error(f"Workflow error: {e}")
|
| 483 |
+
results['errors'].append(f"Workflow error: {e}")
|
| 484 |
+
results['success'] = False
|
| 485 |
+
|
| 486 |
+
return results
|
| 487 |
+
|
| 488 |
+
def _find_pattern_groups(self, files: List[str]) -> List[Dict]:
|
| 489 |
+
"""
|
| 490 |
+
Find groups of files with similar legacy patterns.
|
| 491 |
+
Detects file languages and uses appropriate pattern queries.
|
| 492 |
+
|
| 493 |
+
Args:
|
| 494 |
+
files: List of file paths
|
| 495 |
+
|
| 496 |
+
Returns:
|
| 497 |
+
List of pattern group dictionaries
|
| 498 |
+
"""
|
| 499 |
+
# Detect languages present in the files
|
| 500 |
+
languages = self._detect_languages_in_files(files)
|
| 501 |
+
|
| 502 |
+
# Build language-specific pattern queries
|
| 503 |
+
pattern_queries = self._get_pattern_queries_for_languages(languages)
|
| 504 |
+
|
| 505 |
+
groups = []
|
| 506 |
+
|
| 507 |
+
for query in pattern_queries:
|
| 508 |
+
try:
|
| 509 |
+
similar_files = self.search_engine.find_similar_patterns(query, top_k=10)
|
| 510 |
+
|
| 511 |
+
if similar_files:
|
| 512 |
+
groups.append({
|
| 513 |
+
"pattern_name": query,
|
| 514 |
+
"files": [f['file_path'] for f in similar_files],
|
| 515 |
+
"similarity_scores": [f['score'] for f in similar_files]
|
| 516 |
+
})
|
| 517 |
+
except Exception as e:
|
| 518 |
+
logger.error(f"Error searching for pattern '{query}': {e}")
|
| 519 |
+
|
| 520 |
+
return groups
|
| 521 |
+
|
| 522 |
+
def _detect_languages_in_files(self, files: List[str]) -> set:
|
| 523 |
+
"""Detect programming languages from file extensions."""
|
| 524 |
+
extension_to_language = {
|
| 525 |
+
'.py': 'python',
|
| 526 |
+
'.java': 'java',
|
| 527 |
+
'.js': 'javascript',
|
| 528 |
+
'.ts': 'typescript',
|
| 529 |
+
'.jsx': 'javascript',
|
| 530 |
+
'.tsx': 'typescript',
|
| 531 |
+
'.cpp': 'cpp',
|
| 532 |
+
'.c': 'c',
|
| 533 |
+
'.h': 'c',
|
| 534 |
+
'.cs': 'csharp',
|
| 535 |
+
'.go': 'go',
|
| 536 |
+
'.rb': 'ruby',
|
| 537 |
+
'.php': 'php',
|
| 538 |
+
'.kt': 'kotlin',
|
| 539 |
+
'.scala': 'scala',
|
| 540 |
+
'.rs': 'rust',
|
| 541 |
+
'.swift': 'swift'
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
languages = set()
|
| 545 |
+
for file_path in files:
|
| 546 |
+
ext = Path(file_path).suffix.lower()
|
| 547 |
+
if ext in extension_to_language:
|
| 548 |
+
languages.add(extension_to_language[ext])
|
| 549 |
+
|
| 550 |
+
return languages if languages else {'python'} # Default to Python if no recognized extensions
|
| 551 |
+
|
| 552 |
+
def _get_pattern_queries_for_languages(self, languages: set) -> List[str]:
|
| 553 |
+
"""Get pattern queries appropriate for the detected languages."""
|
| 554 |
+
# Common patterns for all languages
|
| 555 |
+
common_patterns = [
|
| 556 |
+
"Files with SQL injection vulnerabilities",
|
| 557 |
+
"Files with hardcoded credentials or secrets",
|
| 558 |
+
"Files with security vulnerabilities",
|
| 559 |
+
"Files with deprecated API usage"
|
| 560 |
+
]
|
| 561 |
+
|
| 562 |
+
# Language-specific patterns
|
| 563 |
+
language_patterns = {
|
| 564 |
+
'python': [
|
| 565 |
+
"Files using deprecated database libraries like MySQLdb",
|
| 566 |
+
"Files using Python 2 print statements",
|
| 567 |
+
"Files using deprecated urllib2 library",
|
| 568 |
+
"Files missing type hints",
|
| 569 |
+
"Files using old-style string formatting"
|
| 570 |
+
],
|
| 571 |
+
'java': [
|
| 572 |
+
"Files using deprecated Java APIs like Vector or Hashtable",
|
| 573 |
+
"Files using raw JDBC without prepared statements",
|
| 574 |
+
"Files missing try-with-resources for AutoCloseable",
|
| 575 |
+
"Files using pre-Java 8 patterns without lambdas or streams",
|
| 576 |
+
"Files using deprecated Date and Calendar APIs",
|
| 577 |
+
"Files with missing null checks or Optional usage"
|
| 578 |
+
],
|
| 579 |
+
'javascript': [
|
| 580 |
+
"Files using var instead of let or const",
|
| 581 |
+
"Files using callback patterns instead of Promises or async/await",
|
| 582 |
+
"Files using jQuery for DOM manipulation",
|
| 583 |
+
"Files with eval() usage",
|
| 584 |
+
"Files using prototype-based inheritance"
|
| 585 |
+
],
|
| 586 |
+
'typescript': [
|
| 587 |
+
"Files with excessive any type usage",
|
| 588 |
+
"Files missing strict null checks",
|
| 589 |
+
"Files using old module syntax"
|
| 590 |
+
],
|
| 591 |
+
'cpp': [
|
| 592 |
+
"Files using raw pointers instead of smart pointers",
|
| 593 |
+
"Files with manual memory management",
|
| 594 |
+
"Files using C-style casts",
|
| 595 |
+
"Files missing RAII patterns"
|
| 596 |
+
],
|
| 597 |
+
'csharp': [
|
| 598 |
+
"Files using deprecated .NET APIs",
|
| 599 |
+
"Files missing async/await patterns",
|
| 600 |
+
"Files using old collection types"
|
| 601 |
+
],
|
| 602 |
+
'go': [
|
| 603 |
+
"Files missing error handling",
|
| 604 |
+
"Files with goroutine leaks",
|
| 605 |
+
"Files missing context usage"
|
| 606 |
+
],
|
| 607 |
+
'ruby': [
|
| 608 |
+
"Files using deprecated Ruby syntax",
|
| 609 |
+
"Files missing proper error handling"
|
| 610 |
+
],
|
| 611 |
+
'php': [
|
| 612 |
+
"Files using deprecated mysql_* functions",
|
| 613 |
+
"Files missing prepared statements",
|
| 614 |
+
"Files with register_globals usage"
|
| 615 |
+
]
|
| 616 |
+
}
|
| 617 |
+
|
| 618 |
+
queries = common_patterns.copy()
|
| 619 |
+
|
| 620 |
+
for lang in languages:
|
| 621 |
+
if lang in language_patterns:
|
| 622 |
+
queries.extend(language_patterns[lang])
|
| 623 |
+
|
| 624 |
+
return queries
|
| 625 |
+
|
| 626 |
+
def _generate_pr_summary(self, results: Dict, target_version: str) -> str:
|
| 627 |
+
"""Generate PR summary from results."""
|
| 628 |
+
stats = results['statistics']
|
| 629 |
+
|
| 630 |
+
# Build coverage line only if coverage > 0
|
| 631 |
+
coverage_line = ""
|
| 632 |
+
if stats.get('average_coverage', 0) > 0:
|
| 633 |
+
coverage_line = f"**Code Coverage**: {stats['average_coverage']:.1f}%\n"
|
| 634 |
+
|
| 635 |
+
summary = f"""Automated migration to {target_version} with security fixes and performance improvements.
|
| 636 |
+
|
| 637 |
+
**Files Modernized**: {stats['files_modernized']} / {stats['total_files']}
|
| 638 |
+
**Tests Generated**: {stats['tests_generated']}
|
| 639 |
+
**Test Pass Rate**: {stats['test_pass_rate']:.1f}%
|
| 640 |
+
{coverage_line}
|
| 641 |
+
This PR includes:
|
| 642 |
+
- Syntax modernization to {target_version}
|
| 643 |
+
- Security vulnerability fixes
|
| 644 |
+
- Deprecated library replacements
|
| 645 |
+
- Comprehensive test suite
|
| 646 |
+
- Performance optimizations
|
| 647 |
+
|
| 648 |
+
All changes have been validated in an isolated sandbox environment.
|
| 649 |
+
"""
|
| 650 |
+
|
| 651 |
+
return summary
|
| 652 |
+
|
| 653 |
+
def generate_report(self, results: Dict) -> str:
|
| 654 |
+
"""
|
| 655 |
+
Generate human-readable report from results.
|
| 656 |
+
|
| 657 |
+
Args:
|
| 658 |
+
results: Workflow results dictionary
|
| 659 |
+
|
| 660 |
+
Returns:
|
| 661 |
+
Formatted report string
|
| 662 |
+
"""
|
| 663 |
+
report = []
|
| 664 |
+
report.append("=" * 60)
|
| 665 |
+
report.append("LEGACY CODE MODERNIZATION REPORT")
|
| 666 |
+
report.append("=" * 60)
|
| 667 |
+
report.append("")
|
| 668 |
+
|
| 669 |
+
if results['success']:
|
| 670 |
+
report.append("✅ Status: SUCCESS")
|
| 671 |
+
else:
|
| 672 |
+
report.append("❌ Status: FAILED")
|
| 673 |
+
|
| 674 |
+
report.append("")
|
| 675 |
+
report.append("STATISTICS:")
|
| 676 |
+
report.append("-" * 60)
|
| 677 |
+
|
| 678 |
+
stats = results.get('statistics', {})
|
| 679 |
+
for key, value in stats.items():
|
| 680 |
+
# Skip average_coverage if it's 0
|
| 681 |
+
if key == 'average_coverage' and value == 0:
|
| 682 |
+
continue
|
| 683 |
+
report.append(f" {key.replace('_', ' ').title()}: {value}")
|
| 684 |
+
|
| 685 |
+
# Add intelligent pattern statistics if available
|
| 686 |
+
classification_data = results.get('phases', {}).get('classification', {})
|
| 687 |
+
intelligent_stats = classification_data.get('intelligent_stats')
|
| 688 |
+
if intelligent_stats:
|
| 689 |
+
report.append("")
|
| 690 |
+
report.append("INTELLIGENT PATTERN ANALYSIS:")
|
| 691 |
+
report.append("-" * 60)
|
| 692 |
+
report.append(f" Patterns Detected: {intelligent_stats.get('patterns_detected', 0)}")
|
| 693 |
+
report.append(f" Average Modernization Score: {intelligent_stats.get('average_modernization_score', 0)}/100")
|
| 694 |
+
report.append(f" Total Estimated Effort: {intelligent_stats.get('total_estimated_effort_hours', 0)}h")
|
| 695 |
+
|
| 696 |
+
severity_counts = intelligent_stats.get('severity_counts', {})
|
| 697 |
+
if severity_counts:
|
| 698 |
+
report.append(" Severity Breakdown:")
|
| 699 |
+
for severity, count in severity_counts.items():
|
| 700 |
+
if count > 0:
|
| 701 |
+
report.append(f" {severity.upper()}: {count}")
|
| 702 |
+
|
| 703 |
+
report.append("")
|
| 704 |
+
report.append("PHASE RESULTS:")
|
| 705 |
+
report.append("-" * 60)
|
| 706 |
+
|
| 707 |
+
for phase, data in results.get('phases', {}).items():
|
| 708 |
+
report.append(f"\n {phase.upper()}:")
|
| 709 |
+
if isinstance(data, dict):
|
| 710 |
+
for k, v in data.items():
|
| 711 |
+
if k not in ['classifications', 'intelligent_stats']: # Skip large data
|
| 712 |
+
report.append(f" {k}: {v}")
|
| 713 |
+
|
| 714 |
+
# Add output locations
|
| 715 |
+
if results.get('output'):
|
| 716 |
+
report.append("")
|
| 717 |
+
report.append("OUTPUT LOCATIONS:")
|
| 718 |
+
report.append("-" * 60)
|
| 719 |
+
for key, path in results['output'].items():
|
| 720 |
+
report.append(f" 📁 {key.replace('_', ' ').title()}: {path}")
|
| 721 |
+
|
| 722 |
+
if results.get('errors'):
|
| 723 |
+
report.append("")
|
| 724 |
+
report.append("ERRORS:")
|
| 725 |
+
report.append("-" * 60)
|
| 726 |
+
for error in results['errors']:
|
| 727 |
+
report.append(f" ⚠️ {error}")
|
| 728 |
+
|
| 729 |
+
report.append("")
|
| 730 |
+
report.append("=" * 60)
|
| 731 |
+
|
| 732 |
+
return "\n".join(report)
|