# ./core_logic_local.py """ Max Tokens: Increased for local version since there is neither the cost is incurred for tokens nor are there cloud timeouts, the Architect can: . handle longer file contexts, . perform thorough code review, . write deeper code analysis, . produce comprehensive solutions # /v1 Necessity: The /v1 is essential in the base_url for the OpenAI library to correctly route requests to Ollama's API; even though Chrome shows "Ollama is running" message at http://127.0.01:11434, i.e., without "/v1". "First Principles" breakdown of why this is necessary: 1. The Browser vs. The API When visiting 127.0.0.1:11434 in Chrome, we hit the Base URL, Ollama sends back that simple text message just to confirm the service is alive. However, Python code doesn't just check if Ollama is alive, it tries to have a conversation; and for that, it needs to talk to a specific Endpoint (a specific door in the building). 2. OpenAI Compatibility (The Industry Standard) Ollama was designed to be a "drop-in replacement" for OpenAI. Almost every AI library (like the openai Python library) expects a standard URL structure called the OpenAI Chat Completions API. The standard structure looks like this: Base URL: http://localhost:11434 Version Prefix: /v1 Action: /chat/completions When we set base_url='http://localhost:11434/v1', the OpenAI library automatically attaches /chat/completions to the end of it. 3. What happens if "/v1" is removed? The library will try to send the data to http://localhost:11434/chat/completions, but because that URL is missing the "/v1" prefix, Ollama’s "OpenAI Compatibility" layer won't recognize the request, and either a "404 Not Found" or a "405 Method Not Allowed" may be encountered. Summary Checklist: In Chrome: Use 127.0.0.1:11434 - to see if Ollama's up and running. In Python Code: Use 127.0.0.1:11434/v1 - to actually send prompts. """ from openai import OpenAI from tools import web_search, parse_file import os import socket def get_base_url(): # Check if we are inside WSL if os.path.exists('/proc/version'): with open('/proc/version', 'r') as f: if 'microsoft' in f.read().lower(): # if running the script from inside the Ubuntu (WSL) terminal, point to the Windows host return "http://172.17.0.1:11434/v1" # Otherwise, assume we are on the native Windows host, running the script from Windows Powershell/CMD, and point to localhost return "http://127.0.0.1:11434/v1" # Ollama serves an OpenAI-compatible API locally at port 11434 client = OpenAI( base_url=get_base_url(), api_key='ollama', # Required but ignored by Ollama ) """ client = OpenAI( base_url='http://localhost:11434/v1', api_key="ollama" ) """ # Use local model served by Ollama. Make sure to run: ollama serve gemma4 #model = "gemma4:latest" model = "llama3:latest" # better than llama3.2:latest and phi3:latest #model = "llama3.2:latest" #model = "phi3:latest" SYSTEM_PROMPT = """ You are the 'Silicon Architect' — a full-stack, master-stroke creative genius in AI Engineering and Technical Architecture. Your goal is to provide production-grade, highly optimized solutions for web and mobile AI applications. Expertise: Python (latest production version), Agentic Loops, FastAPI, and Scalable Architecture. Provide production-ready code and rigorous technical research with appropriate comments. Analyze files when provided. Be concise. CORE DIRECTIVES: 1. ARCHITECTURAL RIGOR: Always consider scalability, async patterns, and state management. 2. AGENTIC EXPERTISE: You understand recurrent-depth simulations, tool-calling, and autonomous loops. 3. CODE QUALITY: Write clean, PEP 8 compliant, and secure Python/JS code. 4. INNOVATION: Suggest the latest libraries and frameworks (FastAPI, LangGraph, Pydantic AI; but not limited to these). 5. RESEARCH: If the user asks about new tech, use your Web Search capability to provide factual, up-to-date documentation. PERSONALITY: 1. FRANK/POLITE: Disagree with the user, if needed; never resort to sycophancy, and suggest better alternatives. 2. HUMBLE: Apologize when mistaken. 3. FIRST PRINCIPLES: Base your responses and reasoning in Richard Feynman’s first principles thinking. Break down complex problems into fundamental truths and reason up from there. When a user provides files, analyze the code structure and logic before proposing changes. """ def chat_function(message, history): user_text = message.get("text", "") files = message.get("files", []) context_from_files = "" for f in files: path = f["path"] if isinstance(f, dict) else f file_content = parse_file(path) context_from_files += file_content if len(context_from_files) > 12000: context_from_files = context_from_files[:12000] + "\n...[File Content Truncated]..." if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]): research_context = web_search(user_text) prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}" else: prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}" messages = [{"role": "system", "content": SYSTEM_PROMPT}] for turn in history[-3:]: messages.append({"role": turn["role"], "content": turn["content"]}) messages.append({"role": "user", "content": prompt}) try: completion = client.chat.completions.create( model=model, messages=messages, stream=True, temperature=0.2, # Slight temperature for creative architecture max_tokens=2048 # Local power allows for longer responses ) response_text = "" for chunk in completion: if chunk.choices and hasattr(chunk.choices[0].delta, 'content'): token = chunk.choices[0].delta.content if token: response_text += token yield response_text except Exception as e: yield f"Local Architect Error: {str(e)}"