Arif commited on
Commit
697bc47
Β·
1 Parent(s): 5ca944e

Updating docker model runner

Browse files
backend/app/services/llm_service.py CHANGED
@@ -135,7 +135,7 @@ class LLMServiceMLX(BaseLLMService):
135
 
136
 
137
  class LLMServiceDockerModelRunner(BaseLLMService):
138
- """Docker Model Runner implementation"""
139
 
140
  def __init__(self, model_name: str, max_tokens: int, temperature: float, docker_url: str, timeout: int = 300):
141
  super().__init__(model_name, max_tokens, temperature)
@@ -152,7 +152,7 @@ class LLMServiceDockerModelRunner(BaseLLMService):
152
  self.logger.info(f"πŸ”„ Connecting to Docker Model Runner: {self.docker_url}")
153
  self.client = httpx.AsyncClient(timeout=self.timeout)
154
 
155
- # Correct endpoint: /models (not /api/tags)
156
  response = await self.client.get(f"{self.docker_url}/models")
157
 
158
  if response.status_code == 200:
@@ -173,13 +173,13 @@ class LLMServiceDockerModelRunner(BaseLLMService):
173
 
174
  try:
175
  payload = {
176
- "model": self.model_name,
177
  "messages": [{"role": "user", "content": prompt}],
178
  "temperature": self.temperature,
179
  "max_tokens": self.max_tokens,
180
  }
181
 
182
- # Correct endpoint: /chat/completions
183
  response = await self.client.post(
184
  f"{self.docker_url}/chat/completions",
185
  json=payload
@@ -187,7 +187,7 @@ class LLMServiceDockerModelRunner(BaseLLMService):
187
 
188
  if response.status_code == 200:
189
  result = response.json()
190
- return result["choices"]["message"]["content"]
191
  else:
192
  self.logger.error(f"❌ Docker Model Runner error: {response.status_code} - {response.text}")
193
  raise RuntimeError(f"Model Runner error: {response.status_code}")
@@ -204,6 +204,7 @@ class LLMServiceDockerModelRunner(BaseLLMService):
204
 
205
 
206
 
 
207
  class LLMServiceMock(BaseLLMService):
208
  """Mock implementation as fallback"""
209
 
 
135
 
136
 
137
  class LLMServiceDockerModelRunner(BaseLLMService):
138
+ """Docker Model Runner implementation - OpenAI-compatible API"""
139
 
140
  def __init__(self, model_name: str, max_tokens: int, temperature: float, docker_url: str, timeout: int = 300):
141
  super().__init__(model_name, max_tokens, temperature)
 
152
  self.logger.info(f"πŸ”„ Connecting to Docker Model Runner: {self.docker_url}")
153
  self.client = httpx.AsyncClient(timeout=self.timeout)
154
 
155
+ # OpenAI-compatible endpoint: GET /v1/models
156
  response = await self.client.get(f"{self.docker_url}/models")
157
 
158
  if response.status_code == 200:
 
173
 
174
  try:
175
  payload = {
176
+ "model": self.model_name, # "ai/llama3.2:1B-Q4_0"
177
  "messages": [{"role": "user", "content": prompt}],
178
  "temperature": self.temperature,
179
  "max_tokens": self.max_tokens,
180
  }
181
 
182
+ # OpenAI-compatible endpoint: POST /v1/chat/completions
183
  response = await self.client.post(
184
  f"{self.docker_url}/chat/completions",
185
  json=payload
 
187
 
188
  if response.status_code == 200:
189
  result = response.json()
190
+ return result["choices"][0]["message"]["content"]
191
  else:
192
  self.logger.error(f"❌ Docker Model Runner error: {response.status_code} - {response.text}")
193
  raise RuntimeError(f"Model Runner error: {response.status_code}")
 
204
 
205
 
206
 
207
+
208
  class LLMServiceMock(BaseLLMService):
209
  """Mock implementation as fallback"""
210
 
docker-compose.yml CHANGED
@@ -14,6 +14,8 @@ services:
14
  networks:
15
  - llm-network
16
  hostname: backend
 
 
17
 
18
  frontend:
19
  build:
 
14
  networks:
15
  - llm-network
16
  hostname: backend
17
+ extra_hosts:
18
+ - "model-runner.docker.internal:host-gateway"
19
 
20
  frontend:
21
  build: