# ./core_logic_local.py 

"""
Max Tokens: Increased for local version since there is neither the cost is incurred for tokens nor are there cloud timeouts, the Architect can:
. handle longer file contexts, 
. perform  thorough code review, 
. write deeper code analysis, 
. produce comprehensive solutions

# /v1 Necessity: The /v1 is essential in the base_url for the OpenAI library to correctly route requests to Ollama's API; even though Chrome shows "Ollama is running" message at http://127.0.01:11434, i.e., without "/v1".

"First Principles" breakdown of why this is necessary:

1. The Browser vs. The API
When visiting 127.0.0.1:11434 in Chrome, we hit the Base URL, Ollama sends back that simple text message just to confirm the service is alive.

However, Python code doesn't just check if Ollama is alive, it tries to have a conversation; and for that, it needs to talk to a specific Endpoint (a specific door in the building).

2. OpenAI Compatibility (The Industry Standard)
Ollama was designed to be a "drop-in replacement" for OpenAI. Almost every AI library (like the openai Python library) expects a standard URL structure called the OpenAI Chat Completions API.

The standard structure looks like this:

Base URL: http://localhost:11434

Version Prefix: /v1

Action: /chat/completions

When we set base_url='http://localhost:11434/v1', the OpenAI library automatically attaches /chat/completions to the end of it.

3. What happens if "/v1" is removed?
The library will try to send the data to http://localhost:11434/chat/completions, but because that URL is missing the "/v1" prefix, Ollama’s "OpenAI Compatibility" layer won't recognize the request, and either a "404 Not Found" or a "405 Method Not Allowed" may be encountered.

Summary Checklist:
In Chrome: Use 127.0.0.1:11434 - to see if Ollama's up and running.
In Python Code: Use 127.0.0.1:11434/v1  - to actually send prompts.

"""

from openai import OpenAI
from tools import web_search, parse_file
import os
import socket

def get_base_url():
    # Check if we are inside WSL
    if os.path.exists('/proc/version'):
        with open('/proc/version', 'r') as f:
            if 'microsoft' in f.read().lower():
                # if running the script from inside the Ubuntu (WSL) terminal, point to the Windows host
                return "http://172.17.0.1:11434/v1"
    # Otherwise, assume we are on the native Windows host, running the script from Windows Powershell/CMD, and point to localhost
    return "http://127.0.0.1:11434/v1"

# Ollama serves an OpenAI-compatible API locally at port 11434
client = OpenAI(
    base_url=get_base_url(),
    api_key='ollama', # Required but ignored by Ollama
)

"""
client = OpenAI(
    base_url='http://localhost:11434/v1',
    api_key="ollama"
)
"""

# Use local model served by Ollama. Make sure to run: ollama serve gemma4
#model = "gemma4:latest"
model = "llama3:latest" # better than llama3.2:latest and phi3:latest
#model = "llama3.2:latest"
#model = "phi3:latest"

SYSTEM_PROMPT = """
You are the 'Silicon Architect' — a full-stack, master-stroke creative genius in AI Engineering and Technical Architecture. 
Your goal is to provide production-grade, highly optimized solutions for web and mobile AI applications.

Expertise: Python (latest production version), Agentic Loops, FastAPI, and Scalable Architecture. 
Provide production-ready code and rigorous technical research with appropriate comments. Analyze files when provided. Be concise.

CORE DIRECTIVES:
1. ARCHITECTURAL RIGOR: Always consider scalability, async patterns, and state management.
2. AGENTIC EXPERTISE: You understand recurrent-depth simulations, tool-calling, and autonomous loops.
3. CODE QUALITY: Write clean, PEP 8 compliant, and secure Python/JS code.
4. INNOVATION: Suggest the latest libraries and frameworks (FastAPI, LangGraph, Pydantic AI; but not limited to these).
5. RESEARCH: If the user asks about new tech, use your Web Search capability to provide factual, up-to-date documentation.

PERSONALITY:
1. FRANK/POLITE: Disagree with the user, if needed; never resort to sycophancy, and suggest better alternatives.
2. HUMBLE: Apologize when mistaken.
3. FIRST PRINCIPLES: Base your responses and reasoning in Richard Feynman’s first principles thinking. Break down complex problems into fundamental truths and reason up from there.

When a user provides files, analyze the code structure and logic before proposing changes.
"""

def chat_function(message, history):
    user_text = message.get("text", "")
    files = message.get("files", [])
    
    context_from_files = ""
    for f in files:
        path = f["path"] if isinstance(f, dict) else f
        file_content = parse_file(path)
        context_from_files += file_content
    
    if len(context_from_files) > 12000:
        context_from_files = context_from_files[:12000] + "\n...[File Content Truncated]..."

    if any(keyword in user_text.lower() for keyword in ["search", "docs", "latest"]):
        research_context = web_search(user_text)
        prompt = f"RESEARCH:\n{research_context}\n\nFILES:\n{context_from_files}\n\nUSER: {user_text}"
    else:
        prompt = f"FILES:\n{context_from_files}\n\nUSER: {user_text}"

    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    
    for turn in history[-3:]:
        messages.append({"role": turn["role"], "content": turn["content"]})
        
    messages.append({"role": "user", "content": prompt})

    try:
        completion = client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            temperature=0.2, # Slight temperature for creative architecture
            max_tokens=2048 # Local power allows for longer responses
        )

        response_text = ""
        for chunk in completion:
            if chunk.choices and hasattr(chunk.choices[0].delta, 'content'):
                token = chunk.choices[0].delta.content
                if token:
                    response_text += token
                    yield response_text
    except Exception as e:
        yield f"Local Architect Error: {str(e)}"